Package Loading and Data Importing

Load Packages and Custom Functions

# BiocManager::install("mixOmicsTeam/mixOmics")
# remotes::install_github("ying14/yingtools2")
# remotes::install_github("mikabr/ggpirate")
# devtools::install_github("EmilHvitfeldt/paletteer")

library(pacman)
p_load(
  factoextra,               # Dimension reduction
  FactoMineR,               # Dimension reduction
  stabs,                    # Stability variable selection
  mboost,                   # Gradient boosting for model building
  ggrepel,                  # Visualization, repels labels on plots
  bestglm,                  # Logistic regression
  knitr,                    # To change R markdown PDF options
  caret,                    # To calculate model paramters
  cutpointr,                # Calculate cutpoints
  RPostgreSQL,              # Connect to PostgreSQL server
  phyloseq,                 # Phyloseq data wrangling
  microbiomeMarker,         # LEfSe analysis
  ComplexHeatmap,           # Heatmap generator
  paletteer,                # Extensive color palette
  tidyverse,                # Data wrangling and visualization
  circlize,                 # Color ramp builder
  rstatix,                  # Tidyverse statistics
  EnhancedVolcano,          # Volcano plot analysis
  umap,                     # Uniform Manifold Approximation Projection
  ggpubr,                   # Simple ggplots with stats
  ggpirate,                 # ggplot version of Pirate plot
  mixOmics,                 # PLS-DA
  conflicted,               # Forces conflicted packages to be used by preferred package
  gridExtra,                # Arrange multiple grobs
  reticulate,               # Run python in R
  tableone,                 # Additional clinical tables support
  lubridate,                # Manipulate date objects
  yingtools2,               # Custom plotting functions
  cowplot,                  # Combine plots together
  epiR,                     # Obtain model performance measures
  pROC,                     # Produce ROC curves
  plotrix,                  # Add table to base R plot
  glmnet,                   # LASSO regression
  forcats,                  # Factor reordering
  ggpmisc,                  # Miscellaneous ggplot helper functions
  doParallel,               # Parallelize functions (for optimizing cutpoints)
  PRISMAstatement,          # Flow table generator
  DiagrammeRsvg,            # Flow table visualizer
  rsvg,                     # Flow table visualizer
  scales,                   # Scale functions for visualizations
  devtools,                 # To load packages from GitHub
  install = F
)

library("gtsummary")  # Clinical tables, pacman had trouble loading this in, so had to go the manual route

conflict_prefer("select", "dplyr")
conflict_prefer("mutate", "dplyr")
conflict_prefer("filter", "dplyr")
conflict_prefer("rename", "dplyr")
conflict_prefer("slice", "dplyr")
conflict_prefer("between", "dplyr")
conflict_prefer("annotate", "ggplot2")

devtools::source_url("https://github.com/yingeddi2008/DFIutility/blob/master/getRdpPal.R?raw=TRUE")

# ggplot theme shortcuts
et <- element_text
eb <- element_blank
er <- element_rect
el <- element_line

opts_chunk$set(tidy.opts = list(width.cutoff = 60), tidy = TRUE)

`%!in%` <- negate(`%in%`)

# Running python in R
## User must set path to their python library
reticulate::use_python("/usr/bin/python3")

# Customized lefse function
source("./Data/run_lefse2.R")

Import Data

Load in all Metagenomic, Metabolomic and Clinical Data

# 1) Load R image load('./Data/LT_Modeling.RData')

# OR #

# 2) Individually Load R Objects

# Sample lookup
sample_lookup <- readRDS("./Data/sample_lookup.rds")

# First samples from all patients
first_samps <- readRDS("./Data/first_samps_anon.rds")

# Simplified dataframe containing infection information and
# relative abundance of targeted taxa
peri_matrix_all <- readRDS("./Data/peri_matrix_clin_all.rds")

# Distinct detailed infection data
peri_criteria_best <- readRDS("./Data/peri_criteria_best_anon.rds")

# All detailed infection data
peri_criteria_all <- readRDS("./Data/peri_criteria_all_anon.rds")

# NCBI taxonomy lookup
tax_lookup <- readRDS("./Data/tax_lookup.rds")

# Complete metaphlan dataframe for all patients and healthy
# donors
metaphlan_df <- readRDS("./Data/metaphlan_anon.rds")

# Metaphlan dataframe of patient samples
metaphlan_peri_anon <- readRDS("./Data/metaphlan_peri_anon.rds")

# Custom color palette
metaphlan_pal <- getRdpPal(metaphlan_df)

# Qualitative metabolomics
metab_qual_anon <- readRDS("./Data/metab_qual_anon.rds")

# Quantitative metabolomics
metab_quant_anon <- readRDS("./Data/metab_quant_anon.rds")

# Antibiotics data
abx <- readRDS("./Data/abx_anon.rds")

# Demographics data
demo <- readRDS("./Data/demo_anon.rds")

Figures

Figure 4C: Optimal Cutpoint Analysis: Relative Abundance

# Custom function to avoid any errors during map
safe_cutpointr <- possibly(.f = cutpointr, otherwise = "Error")

# Calculate the number of cores
no_cores <- detectCores() - 2

# create the cluster for caret to use
cl <- makePSOCKcluster(no_cores)
registerDoParallel(cl)

set.seed(123456)
test_abundance <- peri_matrix_all %>%
    mutate(sampleID = as.factor(sampleID)) %>%
    select(starts_with(c("entero", "esch", "klebs", "citro",
        "rahn", "proteus")), -ends_with("abs_abundance")) %>%
    mutate_if(is.character, as.numeric) %>%
    mutate(enterococcus_infection = ifelse(`Enterococcus faecium` +
        `Enterococcus faecalis` + `Enterococcus avium` >= 1,
        1, 0), enterobacterales_infection = ifelse(`Escherichia coli` +
        `Klebsiella pneumoniae` + `Citrobacter freundii` + `Proteus mirabilis` >=
        1, 1, 0)) %>%
    select(-c(`Enterococcus faecium`, `Enterococcus faecalis`,
        `Enterococcus avium`, `Escherichia coli`, `Klebsiella pneumoniae`,
        `Citrobacter freundii`, `Proteus mirabilis`)) %>%
    pivot_longer(-c(enterococcus_infection, enterobacterales_infection),
        names_to = "variable", values_to = "value") %>%
    pivot_longer(-c(variable, value), names_to = "infection_type",
        values_to = "infection") %>%
    mutate(variable = paste0("Input: ", variable, "\nPredict: ",
        infection_type)) %>%
    group_by(infection_type, variable) %>%
    group_map(~safe_cutpointr(., value, infection, variable,
        method = maximize_metric, metric = youden, pos_class = 1,
        boot_runs = 100, allowParallel = TRUE, na.rm = T), .keep = TRUE)

# to get cutpoint object
test_abundance[4][[1]]  # ecoc rel abd, ecoc infection
## # A tibble: 1 × 18
##   subgroup                                                             direction
##   <chr>                                                                <chr>    
## 1 "Input: enterococcus_rel_abundance\nPredict: enterococcus_infection" >=       
##   optimal_cutpoint method            youden      acc sensitivity specificity
##              <dbl> <chr>              <dbl>    <dbl>       <dbl>       <dbl>
## 1         0.199356 maximize_metric 0.577090 0.723214    0.882353    0.694737
##        AUC pos_class neg_class prevalence outcome   predictor grouping
##      <dbl>     <dbl>     <dbl>      <dbl> <chr>     <chr>     <chr>   
## 1 0.829721         1         0   0.151786 infection value     variable
##   data               roc_curve            boot               
##   <list>             <list>               <list>             
## 1 <tibble [112 × 2]> <rc_ctpnt [82 × 10]> <tibble [100 × 23]>
test_abundance[1][[1]]  # ebac rel abd, ebacc infection
## # A tibble: 1 × 18
##   subgroup                                                                    
##   <chr>                                                                       
## 1 "Input: enterobacterales_rel_abundance\nPredict: enterobacterales_infection"
##   direction optimal_cutpoint method            youden    acc sensitivity
##   <chr>                <dbl> <chr>              <dbl>  <dbl>       <dbl>
## 1 >=               0.0247691 maximize_metric 0.798077 0.8125           1
##   specificity      AUC pos_class neg_class prevalence outcome   predictor
##         <dbl>    <dbl>     <dbl>     <dbl>      <dbl> <chr>     <chr>    
## 1    0.798077 0.905048         1         0  0.0714286 infection value    
##   grouping data               roc_curve            boot               
##   <chr>    <list>             <list>               <list>             
## 1 variable <tibble [112 × 2]> <rc_ctpnt [59 × 10]> <tibble [100 × 23]>
test_abundance_unnest <- test_abundance %>%
    map_df(as_tibble)


test_abundance_unnest %>%
    separate(subgroup, into = c("Input", "Predict"), sep = "\n",
        remove = F) %>%
    filter(grepl("enterobacterales", Input) & grepl("enterobacterales",
        Predict) | grepl("enterococcus", Input) & grepl("enterococcus",
        Predict)) %>%
    group_by(pos_class) %>%
    ungroup() %>%
    unnest(roc_curve) %>%
    arrange(desc(AUC)) %>%
    select(-boot) %>%
    mutate(auc_label = paste0("AUC = ", formatC(round(AUC, 3),
        digits = 2, format = "f")), auc_label = case_when(grepl("enterococcus_rel_abundance",
        Input) ~ paste0(auc_label, " [", formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
        AUC, alpha = 0.05)[3][1, ], 2)), digits = 2, format = "f"),
        ", ", formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
            AUC, alpha = 0.05)[3][2, ], 2)), digits = 2, format = "f"),
        "]"), grepl("enterobacterales_rel_abundance", Input) ~
        paste0(auc_label, " [", formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
            AUC, alpha = 0.05)[3][1, ], 2)), digits = 2, format = "f"),
            ", ", formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
                AUC, alpha = 0.05)[3][2, ], 2)), digits = 2,
                format = "f"), "]")), acc_label = paste0("Accuracy = ",
        formatC(round(acc, 3) * 100, digits = 0, format = "f"),
        "%"), acc_label = case_when(grepl("enterococcus_rel_abundance",
        Input) ~ paste0(acc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
        acc, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0, format = "f"),
        "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
        acc, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0, format = "f"),
        "%"), "]"), grepl("enterobacterales_rel_abundance", Input) ~
        paste0(acc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
            acc, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
            format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
            acc, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
            format = "f"), "%"), "]")), sens_label = paste0("Sensitivity = ",
        formatC(round(sensitivity, 3) * 100, digits = 0, format = "f"),
        "%"), sens_label = case_when(grepl("enterococcus_rel_abundance",
        Input) ~ paste0(sens_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
        sensitivity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
        format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
        sensitivity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
        format = "f"), "%"), "]"), grepl("enterobacterales_rel_abundance",
        Input) ~ paste0(sens_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
        sensitivity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
        format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
        sensitivity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
        format = "f"), "%"), "]")), spec_label = paste0("Specificity = ",
        formatC(round(specificity, 3) * 100, digits = 0, format = "f"),
        "%"), spec_label = case_when(grepl("enterococcus_rel_abundance",
        Input) ~ paste0(spec_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
        specificity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
        format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
        specificity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
        format = "f"), "%"), "]"), grepl("enterobacterales_rel_abundance",
        Input) ~ paste0(spec_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
        specificity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
        format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
        specificity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
        format = "f"), "%"), "]"))) %>%
    filter(grepl(pattern = "rel_abundance", x = Input)) %>%
    mutate(subgroup = str_to_title(subgroup), subgroup = gsub(pattern = "_",
        replacement = " ", x = subgroup), subgroup = gsub(pattern = "rel abundance",
        replacement = "Relative Abundance (%)", x = subgroup),
        subgroup2 = ifelse(grepl(x = subgroup, pattern = "enterococcus",
            ignore.case = TRUE), "Enterococcus", "Enterobacterales"),
        subgroup2 = factor(subgroup2, levels = c("Enterococcus",
            "Enterobacterales"))) %>%
    ggplot(aes(x = fpr, y = tpr, color = subgroup2)) + geom_line(size = 1.2) +
    geom_text(aes(label = auc_label, x = 0.5, y = 0.13), show.legend = F,
        hjust = 0) + geom_text(aes(label = acc_label, x = 0.5,
    y = 0.09), show.legend = F, hjust = 0) + geom_text(aes(label = spec_label,
    x = 0.5, y = 0.05), show.legend = F, hjust = 0) + geom_text(aes(label = sens_label,
    x = 0.5, y = 0.01), show.legend = F, hjust = 0) + geom_text(aes(label = paste0("Cutpoint = ",
    round((optimal_cutpoint), digits = 3) * 100, "%"), x = 0.5,
    y = 0.17), hjust = 0, show.legend = F) + theme_bw() + theme(axis.text = et(color = "black",
    size = 12), axis.title = et(color = "black", size = 14),
    legend.text = et(size = 12), legend.title = et(size = 14),
    legend.spacing.y = unit(0.5, "cm"), legend.position = "none",
    panel.grid = eb(), strip.text = et(size = 14), strip.background = eb(),
    panel.border = eb(), panel.spacing.y = unit(20, "mm"), axis.line.x = el(color = "black")) +
    geom_vline(xintercept = -0.05) + geom_hline(yintercept = -0.05) +
    scale_x_continuous(expand = expansion(add = c(0.001, 0.05))) +
    scale_y_continuous(expand = expansion(add = c(0.001, 0.05))) +
    facet_wrap(~subgroup2, ncol = 1, scales = "fixed") + ylab("True Positive Rate\n") +
    xlab("\nFalse Positive Rate") + scale_color_manual(values = c("#2dc46b",
    "red")) + guides(color = guide_legend("Groups", byrow = T,
    override.aes = list(size = 5)))

ggsave(filename = "./Results/Figure_4C.pdf", height = 12, width = 7)


# obtain threshold cutpoints for relative abundance
optimal_cutpoint_rel <- test_abundance_unnest %>%
    separate(subgroup, into = c("Input", "Predict"), sep = "\n",
        remove = F) %>%
    filter(grepl("enterobacterales", Input) & grepl("enterobacterales",
        Predict) | grepl("enterococcus", Input) & grepl("enterococcus",
        Predict)) %>%
    group_by(pos_class) %>%
    filter(grepl("rel", subgroup)) %>%
    select(subgroup, optimal_cutpoint)

Figure 5A: Enterococcus Expansion: Volcano Analysis

# Heatmap compounds and their categories
heatmap_lookup <- read.csv("./Data/qual_heatmap_lookup.csv",
    stringsAsFactors = FALSE)

# Build heatmap compound list
heatmap_cmpds <- metab_qual_anon %>%
    mutate(compound = str_to_title(compound), compound = recode(compound,
        Preq1 = "PreQ1")) %>%
    filter(compound %in% heatmap_lookup$compound) %>%
    distinct(compound) %>%
    drop_na()

qual_log2fc_ecoc_expan <- metab_qual_anon %>%
    mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
        compound), compound = str_to_title(compound), compound = recode(compound,
        Preq1 = "PreQ1")) %>%
    filter(compound %in% heatmap_cmpds$compound) %>%
    left_join(peri_matrix_all %>%
        select(sampleID, enterococcus_rel_abundance)) %>%
    drop_na() %>%
    mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
        optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0)) %>%
    arrange(enterococcus_expansion, sampleID) %>%
    group_by(compound) %>%
    mutate(enterococcus_expansion_0 = length(mvalue[enterococcus_expansion ==
        "0"]), enterococcus_expansion_1 = length(mvalue[enterococcus_expansion ==
        "1"])) %>%
    filter(any(mvalue != 0)) %>%
    summarise(log2fc_val = log((mean(mvalue[enterococcus_expansion ==
        "0"], na.rm = T)/mean(mvalue[enterococcus_expansion ==
        "1"], na.rm = T)), base = 2))  # 0 = No Expansion, 1 = Expansion


qual_pval_ecoc_expan <- metab_qual_anon %>%
    mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
        compound), compound = str_to_title(compound), compound = recode(compound,
        Preq1 = "PreQ1")) %>%
    filter(compound %in% heatmap_cmpds$compound) %>%
    left_join(peri_matrix_all %>%
        select(sampleID, enterococcus_rel_abundance)) %>%
    drop_na() %>%
    mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
        optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0)) %>%
    arrange(enterococcus_expansion, sampleID) %>%
    group_by(compound) %>%
    mutate(enterococcus_expansion_0 = length(mvalue[enterococcus_expansion ==
        "0"]), enterococcus_expansion_1 = length(mvalue[enterococcus_expansion ==
        "1"])) %>%
    filter(any(mvalue != 0)) %>%
    rstatix::wilcox_test(mvalue ~ enterococcus_expansion) %>%
    rstatix::adjust_pvalue(method = "BH") %>%
    rstatix::add_significance("p.adj")

qual_tot_ecoc_expan <- left_join(qual_log2fc_ecoc_expan, qual_pval_ecoc_expan) %>%
    column_to_rownames(var = "compound")

# volcano label color
ecoc_expan_volcano_labcol <- qual_tot_ecoc_expan %>%
    filter(p.adj <= 0.05 & abs(log2fc_val) >= 0.75) %>%
    mutate(color = ifelse(log2fc_val < 0, "#389458", "gray47"))

# Volcano Plot (adjusted)
set.seed(456)
volcano_adj_ecoc_expan <- EnhancedVolcano(qual_tot_ecoc_expan,
    lab = rownames(qual_tot_ecoc_expan), x = "log2fc_val", y = "p.adj",
    title = NULL, pCutoff = 0.05, FCcutoff = 0.75, pointSize = 6,
    labSize = 8, axisLabSize = 32, labCol = ecoc_expan_volcano_labcol$color,
    caption = NULL, colAlpha = 0.65, col = c("gray75", c("#D4CA15",
        "#912777", "#1238E3")), xlim = c(-2.5, 4), ylim = c(0,
        8), legendPosition = "none", legendLabels = c(expression(p.adj >
        0.05 * ";" ~ Log[2] ~ FC < "±" * 0.75), expression(p.adj >
        0.05 * ";" ~ Log[2] ~ FC >= "±" * 0.75), expression(p.adj <=
        0.05 * ";" ~ Log[2] ~ FC < "±" * 0.75), expression(p.adj <=
        0.05 * ";" ~ Log[2] ~ FC >= "±" * 0.75)), legendLabSize = 14,
    boxedLabels = T, drawConnectors = T, widthConnectors = 0.2,
    arrowheads = F, gridlines.minor = F, gridlines.major = F,
    max.overlaps = Inf) + theme(axis.text = et(color = "black"),
    legend.text = et(hjust = 0), plot.margin = unit(c(0, 0, 0,
        0), "cm")) + labs(subtitle = NULL) + annotate("segment",
    x = 0.8, xend = 3, y = 7.95, yend = 7.95, arrow = arrow(),
    size = 2, color = "gray67") + annotate("text", x = 1.65,
    y = 8.15, label = "No Expansion", size = 9, color = "gray67") +
    annotate("rect", xmin = 0.75, xmax = Inf, ymin = -log(0.05,
        base = 10), ymax = Inf, alpha = 0.1, fill = "gray87") +
    annotate("segment", x = -0.8, xend = -2.5, y = 7.95, yend = 7.95,
        arrow = arrow(), size = 2, color = "#389458") + annotate("text",
    x = -1.55, y = 8.15, label = "Expansion", size = 9, color = "#389458") +
    annotate("rect", xmin = -0.75, xmax = -Inf, ymin = -log(0.05,
        base = 10), ymax = Inf, alpha = 0.1, fill = "#389458") +
    guides(color = guide_legend(nrow = 4), shape = guide_legend(nrow = 4)) +
    scale_y_continuous(expand = expansion(add = c(0, 0.15)))

volcano_adj_ecoc_expan

ggsave(plot = volcano_adj_ecoc_expan, filename = "./Results/Figure_5A.pdf",
    width = 24, height = 11)

Enterococcus Expansion: Distribution

# Using unadjusted p-values for down-selection of
# metabolites, show distribution of normalized peak area
boxplot_ecoc_expan <- metab_qual_anon %>%
    mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
        compound), compound = str_to_title(compound), compound = recode(compound,
        Preq1 = "PreQ1")) %>%
    filter(compound %in% heatmap_cmpds$compound) %>%
    left_join(peri_matrix_all %>%
        select(sampleID, enterococcus_rel_abundance)) %>%
    drop_na() %>%
    mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
        optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0), enterococcus_expansion = ifelse(enterococcus_expansion ==
        1, "Expansion", "No Expansion")) %>%
    arrange(enterococcus_expansion, sampleID) %>%
    group_by(compound) %>%
    mutate(enterococcus_expansion_0 = length(mvalue[enterococcus_expansion ==
        "No Expansion"]), enterococcus_expansion_1 = length(mvalue[enterococcus_expansion ==
        "Expansion"])) %>%
    filter(any(mvalue != 0)) %>%
    right_join(qual_tot_ecoc_expan %>%
        rownames_to_column(var = "compound") %>%
        mutate(abs_log2fc_val = abs(log2fc_val)) %>%
        filter(p <= 0.05, abs_log2fc_val >= 1))

if (nrow(boxplot_ecoc_expan) > 0) {
    print(ggpar(ggboxplot(boxplot_ecoc_expan, x = "enterococcus_expansion",
        y = "mvalue", color = "enterococcus_expansion", palette = c("#389458",
            "gray87"), ylab = "Normalized Peak Area", xlab = "",
        outlier.shape = NA), legend.title = "Enterococcus") +
        stat_compare_means(label.y.npc = 0.75) + facet_wrap(~compound,
        scales = "free_y") + geom_point(data = boxplot_ecoc_expan,
        aes(x = enterococcus_expansion, y = mvalue, color = enterococcus_expansion),
        position = position_jitter(width = 0.2), size = 2, alpha = 0.65))

    ggsave(filename = "./Results/enterococcus_expansion_boxplot.pdf",
        width = 24, height = 18)
} else {
    print("no significant observations")
}

taxUMAP Data Preparation

# Custom colors
pirate_colors <- c("#1A49BE", "#3A001E")
pirate_colors2 <- c("#3A001E", "#3A001E", "#3A001E", "#1A49BE")

t_metaphlan <- metaphlan_df %>%
    filter(sampleID %in% first_samps$sampleID | grepl(sampleID,
        pattern = "hd")) %>%
    mutate(db = ifelse(grepl(sampleID, pattern = "lt"), "Liver Transplant",
        "Healthy Donor")) %>%
    select(sampleID, taxid, db, pctseqs, Total) %>%
    group_by(sampleID, taxid, pctseqs) %>%
    slice(1) %>%
    ungroup() %>%
    filter(pctseqs >= 1e-04) %>%
    group_by(sampleID) %>%
    dplyr::add_count(taxid, name = "totalSp") %>%
    mutate(sampleID_count = length(unique(sampleID)), spPres = totalSp/sampleID_count) %>%
    filter(spPres >= 0.1) %>%
    select(-c(Total, sampleID_count, spPres, totalSp)) %>%
    group_by(sampleID) %>%
    mutate(pctseqs = pctseqs/sum(pctseqs))


t_metaphlan_mat <- t_metaphlan %>%
    distinct() %>%
    pivot_wider(names_from = "taxid", values_from = "pctseqs",
        values_fill = 0) %>%
    column_to_rownames(var = "sampleID") %>%
    select(-db)


# taxUMAP microbiota table
taxumap_microbiota <- t_metaphlan_mat %>%
    rownames_to_column(var = "index_column") %>%
    pivot_longer(-index_column, names_to = "taxid", values_to = "pctseq") %>%
    mutate(taxid = paste0("taxID", taxid)) %>%
    pivot_wider(names_from = "taxid", values_from = "pctseq")

write.csv(taxumap_microbiota, "./Results/microbiota_table.csv",
    row.names = FALSE)

# taxUMAP taxonomy table
taxumap_taxonomy <- t_metaphlan_mat %>%
    rownames_to_column(var = "index_column") %>%
    pivot_longer(-index_column, names_to = "taxid", values_to = "pctseq") %>%
    left_join(tax_lookup %>%
        mutate(taxid = as.character(taxid))) %>%
    mutate(taxid = paste0("taxID", taxid)) %>%
    distinct(Kingdom, Phylum, Class, Order, Family, Genus, Species,
        taxid) %>%
    transmute(OTU = taxid, Kingdom, Phylum = if_else(Phylum ==
        "", "unclassified", Phylum), Class = if_else(Class ==
        "", "unclassified", Class), Order = if_else(Order ==
        "", "unclassified", Order), Family = if_else(Family ==
        "", "unclassified", Family), Genus = if_else(Genus ==
        "", "unclassified", Genus), Genus = gsub(" ", "_", Genus),
        Species = if_else(Species == "", "unclassified", Species),
        Species = gsub(" ", "_", Species))
write.csv(taxumap_taxonomy, "./Results/taxonomy.csv", row.names = FALSE)

TaxUMAP Analysis

import sys

print(sys.path)

# USER MUST SET THEIR PATH TO THEIR LOCALLY INSTALLED TAXUMAP LOCATION
## ['', '/usr/bin', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python38.zip', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/lib-dynload', '/Users/nick/Library/Python/3.8/lib/python/site-packages', '/opt/anaconda3/bin/taxumap', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/site-packages', '/Library/Frameworks/R.framework/Versions/4.2/Resources/library/reticulate/python']
sys.path.insert(0, '/Users/nick/Documents/taxumap/taxumap/')

from taxumap.taxumap_base import Taxumap

# From file
tu = Taxumap(taxonomy='./Results/taxonomy.csv', microbiota_data='./Results/microbiota_table.csv',random_state=456)

# Transform the data (an inplace function)
## Phylum Class
## not monophyletic
## Class Order
## not monophyletic
## Order Family
## not monophyletic
## Family Genus
## not monophyletic
## post validate inputs main                Kingdom  ...                          Species
## ASV                     ...                                 
## taxID817      Bacteria  ...             Bacteroides_fragilis
## taxID818      Bacteria  ...     Bacteroides_thetaiotaomicron
## taxID820      Bacteria  ...            Bacteroides_uniformis
## taxID821      Bacteria  ...             Phocaeicola_vulgatus
## taxID823      Bacteria  ...       Parabacteroides_distasonis
## ...                ...  ...                              ...
## taxID28447    Bacteria  ...        Clavibacter_michiganensis
## taxID33968    Bacteria  ...  Leuconostoc_pseudomesenteroides
## taxID709323   Bacteria  ...         Fructobacillus_tropaeoli
## taxID1070421  Bacteria  ...            Periweissella_fabalis
## taxID2749962  Bacteria  ...         Lactococcus_paracarnosus
## 
## [672 rows x 7 columns]
## 
## /opt/anaconda3/bin/taxumap/taxumap/dataloading.py:86: UserWarning: Reading taxonomy table. Assuming columns are ordered by phylogeny with in descending order of hierarchy:
##                  e.g. Kingdom, Phylum, ... , Genus, Species, etc.
##                  Additionally, the OTU or ASV column must be labeled as 'OTU' or 'ASV' unless otherwise specified
##   warnings.warn(
tu.transform_self(neigh=55)

# Raw embedding dataframe
## Taxumap(agg_levels = ['Phylum', 'Family'], weights = [1, 1])
## 
## /opt/anaconda3/bin/taxumap/taxumap/taxumap_base.py:91: UserWarning: Setting min_dist to 0.05/sum(weights)
##   warnings.warn("Setting min_dist to 0.05/sum(weights)")
## /opt/anaconda3/bin/taxumap/taxumap/taxumap_base.py:102: UserWarning: Setting epochs to 5000
##   warnings.warn("Setting epochs to %d" % epochs)
## /opt/anaconda3/bin/taxumap/taxumap/tools.py:47: UserWarning: aggregating on Phylum
##   warnings.warn("aggregating on %s" % agg_level)
## /opt/anaconda3/bin/taxumap/taxumap/tools.py:47: UserWarning: aggregating on Family
##   warnings.warn("aggregating on %s" % agg_level)
embedding_df = tu.df_embedding

Figure 1: Shotgun Metagenomics, taxUMAP and Diversity

#### Alpha Diversity ####
# Alpha diversity matrix: Inverse Simpson
alpha_invsim <- vegan::diversity(t_metaphlan_mat, index = "invsimpson") %>%
  as.data.frame() %>% 
  rownames_to_column(var = "sampleID") %>% 
  dplyr::rename("InvSimpson" = ".")

# Alpha Diversity matrix: Shannon
alpha_shannon <- vegan::diversity(t_metaphlan_mat, index = "shannon") %>%
  as.data.frame() %>% 
  rownames_to_column(var = "sampleID") %>% 
  dplyr::rename("Shannon" = ".")

# Alpha Diversity matrix: Observed ASVs
alpha_richness <- vegan::specnumber(t_metaphlan_mat) %>%
  as.data.frame() %>% 
  rownames_to_column(var = "sampleID") %>% 
  dplyr::rename("Richness" = ".")

#### taxUMAP #####
# Find most abundant taxa per sample and plot just that
top_tax <- t_metaphlan %>%
  group_by(sampleID) %>%
  slice_max(pctseqs, n = 1) %>%
  left_join(tax_lookup) %>% 
  mutate(across(everything(), ~ifelse(.=="", NA, as.character(.)))) %>% 
  replace_na(list(Species="unclassified",
                  Genus="unclassified",
                  Family="unclassified",
                  Order="unclassified",
                  Class="unclassified",
                  Phylum="unclassified")) %>%
  mutate(Genus=ifelse(Genus=="unclassified",
                      paste(Family,Genus,sep="\n"),
                      as.character(Genus)),
         pctseqs = as.numeric(pctseqs))


metaphlan_df2 <- t_metaphlan %>% 
  mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>% 
  left_join(tax_lookup) %>% 
  drop_na(taxid) %>% 
  arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
  mutate(Genus = paste0(Phylum,"-",Order,"-", Family, "-",Genus)) %>% 
  left_join(alpha_shannon) %>%
  group_by(sampleID) %>% 
  arrange(Genus) %>% 
  mutate(cum.pct = cumsum(pctseqs), 
         y.text = (cum.pct + c(0, cum.pct[-length(cum.pct)]))/2) %>% 
  ungroup() %>%
  dplyr::select(-cum.pct)

metaphlan_df_sumry <- 
  metaphlan_df2 %>% 
  group_by(sampleID) %>% 
  dplyr::slice(1) %>% 
  ungroup() %>% 
  mutate(healthy_min_shannon = min(Shannon[db == "Healthy Donor"]),
         diversity_group = case_when(
           db == "Healthy Donor" ~ "Healthy Donor",
           Shannon >= healthy_min_shannon ~ "High Diversity",
           TRUE ~ "TBD"
         ),
         lt_med_shannon = median(Shannon[diversity_group == "TBD"], na.rm = TRUE),
         diversity_group = case_when(
           diversity_group %in% c("Healthy Donor", "High Diversity") ~ diversity_group,
           Shannon >= lt_med_shannon ~ "Medium Diversity",
           TRUE ~ "Low Diversity"
         ),
         diversity_group = factor(
           diversity_group,
           levels = c(
             "Low Diversity",
             "Medium Diversity",
             "High Diversity",
             "Healthy Donor"
           )
         ),
         diversity_group_abv = gsub(pattern = " Diversity",
                                    replacement = "",
                                    x = diversity_group),
         diversity_group_abv = factor(diversity_group_abv,
                                      levels = c("Low", "Medium", "High", "Healthy Donor"))) %>% 
  arrange(Genus)


tax_umap_mat_plot <- py$embedding_df %>% 
  as.data.frame() %>% 
  rownames_to_column(var = "sampleID") %>% 
  left_join(t_metaphlan %>% 
              distinct(sampleID, db)) %>%
  left_join(top_tax) %>% 
  ggplot(aes(x = taxumap1, y = taxumap2, color = Genus, shape = db, size = pctseqs*100, alpha = db))+
  geom_point(fill = "black")+
  theme_bw() +
  theme(panel.grid = eb(),
        axis.title = et(color = "black", size = 14),
        axis.text = et(color = "black", size = 12),
        legend.title = et(color = "black", size = 14),
        legend.text = et(color = "black", size = 12),
        legend.position = "top"
        ) + 
  xlab("taxUMAP1") +
  ylab("taxUMAP1") +
  guides(
    shape = guide_legend(
      title = "Cohort",
      override.aes = list(size = 4),
      order = 1,
      nrow = 2,
      title.position = "top",
      title.hjust = 0.5
    ),
    size = guide_legend(
      title = "Relative Abundance (%)",
      order = 2,
      nrow = 2,
      title.position = "top",
      title.hjust = 0.5
    ),
    color = "none",
    fill = "none",
    alpha = "none"
  )+
  scale_color_manual(values = metaphlan_pal)+
  scale_shape_manual(values = c(24, 16))+
  scale_alpha_manual(values = c(1, 0.45))

tax_umap_mat_plot

ggsave(plot = tax_umap_mat_plot, 
            filename = "./Results/Figure_1B.pdf",
            height = 8, width = 8)

#### Alpha Diversity Stats #####
# Obtain stats for alpha diversity
diversity_comps <- list(
  c("Healthy Donor", "High"),
  c("High", "Medium"),
  c("High", "Low"),
  c("Medium", "Low")
  )

alpha_stats <- alpha_invsim %>% 
  left_join(alpha_shannon) %>% 
  left_join(alpha_richness) %>% 
  inner_join(metaphlan_df2 %>%
               left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group_abv)) %>% 
               distinct(sampleID, db, diversity_group_abv)
             ) %>% 
  pivot_longer(!c(sampleID, db, diversity_group_abv),names_to = "diversity_metric", values_to = "value") %>% 
  group_by(diversity_metric) %>% 
  rstatix::wilcox_test(value~diversity_group_abv,
                        comparisons = diversity_comps,
                       p.adjust.method = "BH",
                       alternative= "two.sided"
                       ) %>% 
  ungroup()

# Create dataframe for all phylogentic levels of interest
phylo_rel_abd <- t_metaphlan %>% 
  left_join(tax_lookup) %>% 
  inner_join(metaphlan_df2 %>%
               left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group_abv)) %>% 
               distinct(sampleID, db, diversity_group_abv)
             ) %>% 
  mutate(Species = paste(Kingdom, Phylum, Class, Order, Family, Genus, Species, sep = "|")) %>% 
  filter(grepl(pattern = "Enterococcus|Enterobacterales|Bacteroidetes|Lachnospiraceae|Oscillospiraceae", x = Species)) %>% 
  mutate(organism = case_when(grepl(pattern = "Enterococcus", x = Species) ~ "Enterococcus",
                              grepl(pattern = "Enterobacterales", x = Species) ~ "Enterobacterales",
                              grepl(pattern = "Bacteroidetes", x = Species) ~ "Bacteroidetes",
                              grepl(pattern = "Lachnospiraceae", x = Species) ~ "Lachnospiraceae",
                              grepl(pattern = "Oscillospiraceae", x = Species) ~ "Oscillospiraceae")) %>% 
  select(sampleID, db, diversity_group_abv, organism, pctseqs) %>% 
  group_by(sampleID, db, diversity_group_abv, organism) %>% 
  summarise(pctseqs = sum(pctseqs)) %>% 
  ungroup() %>% 
  pivot_wider(names_from = organism, values_from = pctseqs, values_fill = 0) %>% 
  pivot_longer(!c(sampleID, db, diversity_group_abv), names_to = "organism", values_to = "pctseqs")

# Obtain stats for all phylogentic levels of interest
rel_abd_alpha_stats <- phylo_rel_abd %>%  
  group_by(organism) %>% 
  rstatix::wilcox_test(pctseqs~diversity_group_abv) %>% 
  bind_rows(alpha_stats) %>% 
  rstatix::adjust_pvalue(method = "BH") %>% 
  mutate(p.adj = ifelse(p.adj < 0.001, 0.001, round(p.adj, 3)))

write.csv(rel_abd_alpha_stats, "./Results/Figure_1_Statistics.csv", row.names = FALSE)

#### Plot Inverse Simpson ####
symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), symbols = c("****", "***", "**", "*", "ns"))

diversity_group_colors <- c("#3A001E", "#8A0246", "#C20463", "#1A49BE")

set.seed(456)
gg_alpha_invsim <- alpha_invsim %>% 
  inner_join(t_metaphlan %>% 
               distinct(sampleID, db) %>% 
               select(sampleID, db)
             ) %>% 
  inner_join(metaphlan_df %>%
               left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group, diversity_group_abv)) %>% 
               distinct(sampleID, db, diversity_group, diversity_group_abv)
             ) %>% 
  mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>% 
  ggplot(., aes(x = diversity_group_abv, 
                y = InvSimpson, 
                color = diversity_group_abv, 
                fill = db)) +
  geom_boxplot(outlier.colour = NA,
               alpha = 0.35) +
  geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
  stat_compare_means(comparisons = diversity_comps,
                     tip.length = 0.01,
                     step.increase = 0.075,
                     symnum.args = symnum.args,
                     method.args = list(alternative = "two.sided",
                                        exact = FALSE)
                     ) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 14, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 12, color = "black"),
    plot.margin = margin(t = 5,  # Top margin
                         r = 5,  # Right margin
                         b = 5,  # Bottom margin
                         l = 5), # Left margin
    panel.border = eb(),
    axis.line = el(color = 'black')
  ) +
  ylab("Alpha Diversity\n(Inverse Simpson)") +
  scale_fill_manual(values = rev(pirate_colors)) +
  scale_color_manual(values = diversity_group_colors) +
  guides(fill = guide_legend("Cohort"),
         color = guide_legend("Diversity Group",
                              override.aes = aes(label = "")))+
  scale_y_continuous(breaks = seq(0,45,5),
                     expand = expansion(mult = c(0.01, 0.1))) +
  coord_cartesian(xlim = c(1.1,3.9))
## [1] FALSE
gg_alpha_invsim

pdf(file = "./Results/Figure_1C.pdf", height = 6, width = 7)
gg_alpha_invsim
invisible(dev.off())

#### Plot Shannon Diversity ####
set.seed(456)
gg_alpha_shannon <- alpha_shannon %>% 
  inner_join(t_metaphlan %>% 
               distinct(sampleID, db) %>% 
               select(sampleID, db)
             ) %>% 
  inner_join(metaphlan_df %>%
               left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group, diversity_group_abv)) %>% 
               distinct(sampleID, db, diversity_group, diversity_group_abv)
             ) %>% 
  mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>% 
  ggplot(., aes(x = diversity_group_abv, 
                y = Shannon, 
                color = diversity_group_abv, 
                fill = db)) +
  geom_boxplot(outlier.colour = NA,
               alpha = 0.35) +
  geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
  stat_compare_means(comparisons = diversity_comps,
                     tip.length = 0.01,
                     step.increase = 0.075,
                     symnum.args = symnum.args,
                     method.args = list(alternative = "two.sided",
                                        exact = FALSE)
                     ) + 
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 14, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 12, color = "black"),
    plot.margin = margin(t = 5,  # Top margin
                         r = 5,  # Right margin
                         b = 5,  # Bottom margin
                         l = 5), # Left margin
    panel.border = eb(),
    axis.line = el(color = 'black')
  ) +
  ylab("Alpha Diversity\n(Shannon)") +
  scale_fill_manual(values = rev(pirate_colors)) +
  scale_color_manual(values = diversity_group_colors) +
  guides(fill = guide_legend("Cohort"),
         color = guide_legend("Diversity Group",
                              override.aes = aes(label = "")))+
  scale_y_continuous(breaks = seq(0,6.5,1),
                     expand = expansion(mult = c(0.01, 0.1))) +
  coord_cartesian(xlim = c(1.1,3.9))
## [1] FALSE
gg_alpha_shannon

pdf(file = "./Results/Figure_1D.pdf", height = 6, width = 7)
gg_alpha_shannon
invisible(dev.off())

#### Plot Species Richness ####
set.seed(456)
gg_alpha_richness <- alpha_richness %>% 
  inner_join(t_metaphlan %>% 
               distinct(sampleID, db) %>% 
               select(sampleID, db)
             ) %>% 
  inner_join(metaphlan_df %>%
               left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group, diversity_group_abv)) %>% 
               distinct(sampleID, db, diversity_group, diversity_group_abv)
             ) %>% 
  mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>% 
  ggplot(., aes(x = diversity_group_abv, 
                y = Richness, 
                color = diversity_group_abv, 
                fill = db)) +
  geom_boxplot(outlier.colour = NA,
               alpha = 0.35) +
  geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
  stat_compare_means(comparisons = diversity_comps,
                     tip.length = 0.01,
                     label.y = c(145, 160, 150, 110),
                     symnum.args = symnum.args,
                     method.args = list(alternative = "two.sided",
                                        exact = FALSE)
                     ) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 14, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 12, color = "black"),
    plot.margin = margin(t = 5,  # Top margin
                         r = 5,  # Right margin
                         b = 5,  # Bottom margin
                         l = 5), # Left margin
    panel.border = eb(),
    axis.line = el(color = 'black')
  ) +
  ylab("Alpha Diversity\n(Richness)") +
  scale_fill_manual(values = rev(pirate_colors)) +
  scale_color_manual(values = diversity_group_colors) +
  guides(fill = guide_legend("Cohort"),
         color = guide_legend("Diversity Group",
                              override.aes = aes(label = "")))+
  scale_y_continuous(breaks = seq(0,180,50),
                     expand = expansion(mult = c(0.01, 0.035))) +
  coord_cartesian(xlim = c(1.1,3.9))
## [1] FALSE
gg_alpha_richness

pdf(file = "./Results/Figure_1E.pdf", height = 6, width = 7)
gg_alpha_richness
invisible(dev.off())

#### Plot Lachnospiraceae ####
set.seed(456)
gg_lach_rel_abd <- phylo_rel_abd %>% 
  filter(organism == "Lachnospiraceae") %>% 
  group_by(organism) %>% 
  mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>% 
  ggplot(., aes(x = diversity_group_abv, 
                y = pctseqs, 
                color = diversity_group_abv, 
                fill = db)) +
  geom_boxplot(outlier.colour = NA,
               alpha = 0.35) +
  geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
  stat_compare_means(comparisons = diversity_comps,
                     tip.length = 0.01,
                     symnum.args = symnum.args,
                     method.args = list(alternative = "two.sided",
                                        exact = FALSE),
                     label.y = c(0.65, 0.71, 0.76, 0.82)
                     ) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 14, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 12, color = "black"),
    plot.margin = margin(t = 5,  # Top margin
                         r = 5,  # Right margin
                         b = 5,  # Bottom margin
                         l = 5), # Left margin
    panel.border = eb(),
    axis.line = el(color = 'black')
  ) +
  ylab(~atop(paste(italic("Lachnospiraceae")), paste("MetaPhlAn4 Relative Abundance"))) +
  scale_fill_manual(values = rev(pirate_colors)) +
  scale_color_manual(values = diversity_group_colors) +
  guides(fill = guide_legend("Cohort"),
         color = guide_legend("Diversity Group",
                              override.aes = aes(label = "")))+
  scale_y_continuous(breaks = seq(0,1,0.1),
                     expand = expansion(mult = c(0.01, 0.035)),
                     labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(1.1,3.9))
## [1] FALSE
gg_lach_rel_abd

pdf(file = "./Results/Figure_1F.pdf", height = 6, width = 7)
gg_lach_rel_abd
invisible(dev.off())

#### Plot Enterococcus ####
set.seed(456)
gg_ecoc_rel_abd <- phylo_rel_abd %>% 
  filter(organism == "Enterococcus") %>% 
  group_by(organism) %>% 
  mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>% 
  ggplot(., aes(x = diversity_group_abv, 
                y = pctseqs, 
                color = diversity_group_abv, 
                fill = db)) +
  geom_boxplot(outlier.colour = NA,
               alpha = 0.35) +
  geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
  stat_compare_means(comparisons = diversity_comps,
                     tip.length = 0.01,
                     symnum.args = symnum.args,
                     method.args = list(alternative = "two.sided",
                                        exact = FALSE),
                    
                     label.y = c(0.20, 0.75, 0.985, 1.05)
                     ) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 14, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 12, color = "black"),
    plot.margin = margin(t = 5,  # Top margin
                         r = 5,  # Right margin
                         b = 5,  # Bottom margin
                         l = 5), # Left margin
    panel.border = eb(),
    axis.line = el(color = 'black')
  ) +
  ylab(~atop(paste(italic("Enterococcus")), paste("MetaPhlAn4 Relative Abundance"))) +
  scale_fill_manual(values = rev(pirate_colors)) +
  scale_color_manual(values = diversity_group_colors) +
  guides(fill = guide_legend("Cohort"),
         color = guide_legend("Diversity Group",
                              override.aes = aes(label = "")))+
  scale_y_continuous(breaks = seq(0,1,0.1),
                     expand = expansion(mult = c(0.01, 0.035)),
                     labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(1.1,3.9))
## [1] FALSE
gg_ecoc_rel_abd

pdf(file = "./Results/Figure_1G.pdf", height = 6, width = 7)
gg_ecoc_rel_abd
invisible(dev.off())

#### Plot Enterobacterales #####
set.seed(456)
gg_ebac_rel_abd <- phylo_rel_abd %>% 
  filter(organism == "Enterobacterales") %>% 
  group_by(organism) %>% 
  mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>% 
  ggplot(., aes(x = diversity_group_abv, 
                y = pctseqs, 
                color = diversity_group_abv, 
                fill = db)) +
  geom_boxplot(outlier.colour = NA,
               alpha = 0.35) +
  geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
  stat_compare_means(comparisons = diversity_comps,
                     tip.length = 0.01,
                     symnum.args = symnum.args,
                     method.args = list(alternative = "two.sided",
                                        exact = FALSE),
                     label.y = c(0.20, 0.75, 0.985, 1.05)
                     ) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 14, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 12, color = "black"),
    plot.margin = margin(t = 5,  # Top margin
                         r = 5,  # Right margin
                         b = 5,  # Bottom margin
                         l = 5), # Left margin
    panel.border = eb(),
    axis.line = el(color = 'black')
  ) +
  ylab(~atop(paste(italic("Enterobacterales")), paste("MetaPhlAn4 Relative Abundance"))) +
  scale_fill_manual(values = rev(pirate_colors)) +
  scale_color_manual(values = diversity_group_colors) +
  guides(fill = guide_legend("Cohort"),
         color = guide_legend("Diversity Group",
                              override.aes = aes(label = "")))+
  scale_y_continuous(breaks = seq(0,1,0.1),
                     expand = expansion(mult = c(0.01, 0.035)),
                     labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(1.1,3.9))
## [1] FALSE
gg_ebac_rel_abd

pdf(file = "./Results/Figure_1H.pdf", height = 6, width = 7)
gg_ebac_rel_abd
invisible(dev.off())

#### Plot Bacteroidetes ####
set.seed(456)
gg_bact_rel_abd <- phylo_rel_abd %>% 
  filter(organism == "Bacteroidetes") %>% 
  group_by(organism) %>% 
  mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>% 
  ggplot(., aes(x = diversity_group_abv, 
                y = pctseqs, 
                color = diversity_group_abv, 
                fill = db)) +
  geom_boxplot(outlier.colour = NA,
               alpha = 0.35) +
  geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
  stat_compare_means(comparisons = diversity_comps,
                     tip.length = 0.01,
                     symnum.args = symnum.args,
                     method.args = list(alternative = "two.sided",
                                        exact = FALSE),
                     label.y = c(0.65, 0.98, 0.92, 1.02)
                     ) + 
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 14, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 12, color = "black"),
    plot.margin = margin(t = 5,  # Top margin
                         r = 5,  # Right margin
                         b = 5,  # Bottom margin
                         l = 5), # Left margin
    panel.border = eb(),
    axis.line = el(color = 'black')
  ) +
  ylab(~atop(paste(italic("Bacteroidetes")), paste("MetaPhlAn4 Relative Abundance"))) +
  scale_fill_manual(values = rev(pirate_colors)) +
  scale_color_manual(values = diversity_group_colors) +
  guides(fill = guide_legend("Cohort"),
         color = guide_legend("Diversity Group",
                              override.aes = aes(label = "")))+
  scale_y_continuous(breaks = seq(0,1,0.1),
                     expand = expansion(mult = c(0.01, 0.035)),
                     labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(1.1,3.9))
## [1] FALSE
gg_bact_rel_abd

pdf(file = "./Results/Figure_1I.pdf", height = 6, width = 7)
gg_bact_rel_abd
invisible(dev.off())

#### Plot Oscillospiraceae ####
set.seed(456)
gg_oscl_rel_abd <- phylo_rel_abd %>% 
  filter(organism == "Oscillospiraceae") %>% 
  group_by(organism) %>% 
  mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>% 
  ggplot(., aes(x = diversity_group_abv, 
                y = pctseqs, 
                color = diversity_group_abv, 
                fill = db)) +
  geom_boxplot(outlier.colour = NA,
               alpha = 0.35) +
  geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
  stat_compare_means(comparisons = diversity_comps,
                     tip.length = 0.01,
                     symnum.args = symnum.args,
                     method.args = list(alternative = "two.sided",
                                        exact = FALSE),
                     label.y = c(0.33, 0.52, 0.59, 0.66)
                     ) +
  theme_bw() +
  theme(
    panel.grid = eb(),
    axis.title.y = et(size = 14, color = "black"),
    axis.title.x = eb(),
    axis.text = et(size = 12, color = "black"),
    plot.margin = margin(t = 5,  # Top margin
                         r = 5,  # Right margin
                         b = 5,  # Bottom margin
                         l = 5), # Left margin
    panel.border = eb(),
    axis.line = el(color = 'black')
  ) +
  ylab(~atop(paste(italic("Oscillospiraceae")), paste("MetaPhlAn4 Relative Abundance"))) +
   scale_fill_manual(values = rev(pirate_colors)) +
  scale_color_manual(values = diversity_group_colors) +
  guides(fill = guide_legend("Cohort"),
         color = guide_legend("Diversity Group",
                              override.aes = aes(label = "")))+
  scale_y_continuous(breaks = seq(0,1,0.1),
                     expand = expansion(mult = c(0.01, 0.035)),
                     labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(1.1,3.9))
## [1] FALSE
gg_oscl_rel_abd

pdf(file = "./Results/Figure_1J.pdf", height = 6, width = 7)
gg_oscl_rel_abd
invisible(dev.off())

#### Plot Metaphlan Relative Abundance ####
# Figure 1A-MetaPhlAn4 Taxonomy
# Load legend
tax_legend <- magick::image_read_pdf("./Data/legend.v2.pdf")

gg_tax_legend <- cowplot::ggdraw() + cowplot::draw_image(tax_legend)

metaphlan_pal2 <- getRdpPal(metaphlan_df2)

gg_metaphlan <- metaphlan_df2 %>%
  left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group)) %>% 
  ungroup() %>% 
  mutate(Genus = factor(Genus, levels = unique(Genus))) %>% 
  group_by(sampleID) %>% 
  arrange(Genus) %>% 
  ggplot(aes(x=reorder(sampleID, Shannon),y=pctseqs)) +
  geom_bar(stat="identity",aes(fill=Genus), width = 0.9) +
  scale_fill_manual(values = metaphlan_pal2) +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x=eb(),
        axis.ticks.x=eb(),
        strip.text.x= et(angle=0,size=14),
        strip.background = eb(),
        axis.title.y = et(color = "black", size = 14),
        axis.text.y = et(color = "black", size = 12),
        panel.spacing = unit(0.5, "lines"),
        plot.margin = margin(t = 5,
                             r = 5, 
                             b = 0, 
                             l = 5)) +
  facet_grid(. ~diversity_group, scales = "free", space = "free")+
  scale_y_continuous(expand = expansion(mult = c(0.005,0.005)),
                     labels = scales::percent_format(accuracy = 1)) +
  ylab("MetaPhlAn4 Relative Abundance") +
  xlab("")


# Color facets
gg_metaphlan_grob <- ggplot_gtable(ggplot_build(gg_metaphlan))
strip_both <- which(grepl('strip-', gg_metaphlan_grob$layout$name))
fills <- diversity_group_colors
k <- 1
for (i in strip_both) {
  l <- which(grepl('titleGrob', gg_metaphlan_grob$grobs[[i]]$grobs[[1]]$childrenOrder))
  gg_metaphlan_grob$grobs[[i]]$grobs[[1]]$children[[l]]$children[[1]]$gp$col <- fills[k]
  k <- k+1
}

gg_shannon <- metaphlan_df_sumry %>% 
  ggplot(aes(x=reorder(sampleID, Shannon), y = Shannon)) +
  geom_bar(stat="identity",aes(fill=diversity_group_abv), width = 0.9) +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = eb(),
        axis.title.x = eb(),
        axis.ticks.x = eb(),
        strip.text = eb(),
        strip.background = er(fill = "white"),
        axis.title.y = et(color = "black", size = 14),
        axis.text.y = et(color = "black", size = 12),
        panel.spacing = unit(0.5, "lines"),
        plot.margin = margin(t = 0,
                             r = 5, 
                             b = 0, 
                             l = 5),
        panel.grid = eb()) +
  scale_fill_manual(values = diversity_group_colors) +
  facet_grid(. ~diversity_group, scales = "free", space = "free")

gg_metaphlan_shannon <-
plot_grid(
  gg_metaphlan_grob,
  gg_shannon,
  axis = "lb",
  align = "hv",
  nrow = 2,
  rel_heights = c(1, 0.15)
)

pdf(file = "./Results/Figure_1A.pdf", width = 12.25, height = 8)
gg_metaphlan_shannon 
invisible(dev.off())

#### Combine all into a single figure 1 Start ####
alpha_org_plot <- plot_grid(
  gg_alpha_invsim + theme(legend.position = "none", 
                          axis.text.x = eb(), 
                          axis.title.x = eb(),
                          plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
  gg_alpha_shannon + theme(legend.position = "none", 
                           axis.text.x = eb(), 
                           axis.title.x = eb(),
                           plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
  gg_alpha_richness+ theme(legend.position = "none", 
                           axis.text.x = eb(),
                           axis.title.x = eb(),
                           plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
  gg_lach_rel_abd + theme(axis.text.x = eb(),
                          axis.title.x = eb(),
                          plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
  gg_ecoc_rel_abd + theme(legend.position = "none", 
                          axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85), 
                          plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
  gg_ebac_rel_abd + theme(legend.position = "none", 
                          axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85), 
                          plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
  gg_bact_rel_abd + theme(legend.position = "none", 
                          axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85), 
                          plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
  gg_oscl_rel_abd + theme(axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85), 
                          plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
  nrow = 2,
  axis = "lb",
  align = "v",
  rel_widths = c(1, 1, 1, 1.3),
  rel_heights = c(1,1.2)
)

# This is useful to figure out the general layout of the plots
gs <- lapply(1:4, function(ii)
  grobTree(rectGrob(gp=gpar(fill=ii, alpha=0.5)), textGrob(ii)))

grid.arrange(grobs=gs, ncol=4,
               top="top label", bottom="bottom\nlabel",
               left="left label", right="right label")

lay <- rbind(c(1,1,1,1,1,3,3,2),
             c(1,1,1,1,1,3,3,2),
             c(4,4,4,4,4,4,NA,NA),
             c(4,4,4,4,4,4,4,4),
             c(4,4,4,4,4,4,4,4))

grid.arrange(grobs = gs, layout_matrix = lay)

pdf(file = "./Results/Figure_1.pdf", height = 16, width = 20)

grid.arrange(
  gg_metaphlan_shannon,                        # 1
  gg_tax_legend,                               # 3
  tax_umap_mat_plot + 
    theme(
      legend.text = et(size = 10),
      legend.title = et(size = 12),
      plot.margin = margin(
        t = 5,  # Top margin
        r = 0,  # Right margin
        b = 75, # Bottom margin
        l = 5   # Left margin
      )
    ),                                      # 2
  alpha_org_plot,                           # 4
  layout_matrix = lay
)

invisible(dev.off())


grid.arrange(
  gg_metaphlan_shannon,                        # 1
  gg_tax_legend,                               # 2
  tax_umap_mat_plot + 
    theme(
      legend.text = et(size = 10),
      legend.title = et(size = 12),
      plot.margin = margin(
         t = 5,  # Top margin
        r = 0,  # Right margin
        b = 75, # Bottom margin
        l = 5   # Left margin
      )
    ),                                      # 3
  alpha_org_plot,                           # 4
  layout_matrix = lay
)

#### Combine all into a single figure 1 End ####

Figure 2: Qualitative Metabolomics Heatmap

heatmap_data <- 
  t_metaphlan %>% 
  drop_na(taxid) %>% 
  select(sampleID) %>% 
  group_by(sampleID) %>% 
  dplyr::slice(1) %>% 
  mutate(db = ifelse(grepl(sampleID, pattern = "lt"), "Liver Transplant", "Healthy Donor")) %>%
  left_join(metab_qual_anon) %>% 
  mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
         compound = str_to_title(compound),
         compound = recode(compound,
                          Preq1 = "PreQ1")) %>% 
  filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>% 
  group_by(compound) %>% 
  mutate(median_val = ifelse(median(mvalue, na.rm = TRUE) == 0, min(mvalue[mvalue > 0], na.rm = TRUE)/10, median(mvalue, na.rm = TRUE)),
         heatmap_val = ifelse(log(mvalue/ median_val, base = 2) == -Inf, 0, log(mvalue/ median_val, base = 2))) %>% 
  ungroup() %>% 
  select(-c(mvalue, median_val)) %>% 
  group_by(sampleID, compound) %>% 
  slice_max(heatmap_val, with_ties = F, n = 1) %>% 
  ungroup() %>%
  select(-db) %>% 
  left_join(metaphlan_df2 %>%
               left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group_abv)) %>%
               distinct(sampleID, db, diversity_group_abv), by = "sampleID") %>% 
  group_by(sampleID, compound) %>% 
  slice_max(heatmap_val, with_ties = F, n = 1) %>% 
  left_join(alpha_shannon) %>% 
  group_by(db) %>% 
  arrange(db, Shannon) %>% 
  ungroup()

# Stats
heatmap_pvals <-
  heatmap_data %>%
  filter(diversity_group_abv != "Healthy Donor") %>% 
  drop_na(compound) %>%
  group_by(compound) %>%
  rstatix::kruskal_test(heatmap_val~diversity_group_abv) %>%
  rstatix::adjust_pvalue(method = "BH") %>%
  select(compound, statistic, p, p.adj) 

heatmap_labels <- 
  heatmap_data %>% 
  mutate(compound = str_to_title(compound)) %>% 
  left_join(heatmap_lookup) %>%
  left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1)) %>%
  arrange(class, subclass, p) %>%
  arrange(class, subclass) %>% 
  pivot_wider(c(diversity_group_abv, sampleID, db, Shannon), names_from = "compound", values_from = "heatmap_val", values_fn = mean) %>% 
  group_by(db) %>% 
  arrange(db, Shannon) %>% 
  ungroup() %>% 
  distinct(sampleID, .keep_all = TRUE) %>% 
  select(diversity_group_abv) %>% 
  mutate(
    diversity_group_abv = as.character(diversity_group_abv),
    diversity_group_abv = ifelse(
      grepl(pattern = "Healthy", as.character(diversity_group_abv)),
      diversity_group_abv,
      paste(diversity_group_abv, "Diversity")
    ),
    diversity_group_abv = factor(diversity_group_abv, levels = c("Low Diversity", "Medium Diversity",
                                                                 "High Diversity", "Healthy Donor"))
  )
  
# Build heatmap compound order (row order) and compound class (row slice)
heatmap_order <- 
  heatmap_data %>% 
  ungroup() %>% 
  filter(compound %in% heatmap_cmpds$compound) %>% 
  distinct(compound) %>% 
  drop_na() %>% 
  left_join(heatmap_lookup) %>% 
  left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1)) %>%
  ungroup() %>% 
  mutate(class = factor(
    class,
    levels = c(
      "Fatty Acid",                   # 1
      "Dicarboxylic Acid",            # 2
      "Amino Acid",                   # 3
      "Bile Acid",                    # 4
      "Indole",                       # 5
      "Phenolic Aromatic",            # 6
      "Kynurine Pathway",             # 7
      "Vitamin"                       # 8
    )
  ),
  subclass = factor(
    subclass,
    levels = c(
      "Short-Chain Fatty Acid",       # 1
      "Branched-Chain Fatty Acid",    # 2
      "Aminated Fatty Acid",          # 3
      "Long-Chain Fatty Acid",        # 4
      "Dicarboxylic Acid",            # 5
      "Amino Acid",                   # 6
      "Primary Bile Acid",            # 7
      "Secondary Bile Acid",          # 8
      "Conjugated Bile Acid",         # 9
      "Indole",                       # 10
      "Phenolic Aromatic",            # 11
      "Kynurine Pathway",             # 12
      "Vitamin"                       # 13
    )
  )) %>% 
  arrange(class, subclass, p, compound) %>%
  select(class, subclass, p, compound)
  
# Build heatmap patient order following same order as gg_metaphlan plot
heatmap_column_order <- heatmap_data %>% 
  group_by(patientID) %>% 
  dplyr::slice(1) %>% 
  left_join(alpha_shannon) %>% 
  group_by(db) %>% 
  arrange(db, Shannon) %>%  
  select(patientID) %>% 
  distinct(patientID) %>% 
  pull(patientID)

# P-value legend color
pvalue_col_fun = colorRamp2(c(0, 0.045), c("#75C236", "#E3E4E6"))

# Create heatmap for adjusted p-values
pvalue_adj <-
heatmap_pvals %>% 
  group_by(compound, p.adj, p) %>% 
  dplyr::slice(1) %>% 
  ungroup() %>% 
  left_join(heatmap_lookup) %>%
  mutate(class = factor(
    class,
    levels = c(
      "Fatty Acid",                   # 1
      "Dicarboxylic Acid",            # 2
      "Amino Acid",                   # 3
      "Bile Acid",                    # 4
      "Indole",                       # 5
      "Phenolic Aromatic",            # 6
      "Kynurine Pathway",             # 7
      "Vitamin"                       # 8
    )
  ),
  subclass = factor(
    subclass,
    levels = c(
      "Short-Chain Fatty Acid",       # 1
      "Branched-Chain Fatty Acid",    # 2
      "Aminated Fatty Acid",          # 3
      "Long-Chain Fatty Acid",        # 4
      "Dicarboxylic Acid",            # 5
      "Amino Acid",                   # 6
      "Primary Bile Acid",            # 7
      "Secondary Bile Acid",          # 8
      "Conjugated Bile Acid",         # 9
      "Indole",                       # 10
      "Phenolic Aromatic",            # 11
      "Kynurine Pathway",             # 12
      "Vitamin"                       # 13
    )
  )) %>% 
  arrange(class, subclass, p.adj, compound) %>%
  select(class, subclass, p.adj, compound) %>% 
  column_to_rownames(var = "compound") %>% 
  select(`Adjusted p-value` = p.adj) %>% 
  as.matrix() %>% 
Heatmap(name = "Significance (adjusted p-value)",
        cluster_rows = FALSE,
        cluster_columns = FALSE,
        col = pvalue_col_fun,        
        column_title = "KW Test",
        column_title_side = "bottom",
        column_title_rot = 90,
        show_column_names = FALSE,
        column_names_side = "top",
        row_names_gp = gpar(fontsize = 8),
        rect_gp = gpar(col = "black", lwd = 0.2),
        row_gap = unit(3.5, "mm"),
        row_names_side = c("right"),
        row_order = heatmap_order$compound,
        row_split = heatmap_order$subclass,
        show_row_names = FALSE,
        row_title_rot = 0,
        heatmap_legend_param = list(at = seq(0.05, 0, -0.01)),
        width = unit(0.1, "in")
)

# Heatmap legend color
col_fun <- colorRamp2(breaks = c(-5, 0, 5), colors = c("#00aaad", "white", "#ad003a"))

# Global parameter for annotation 
# ht_opt$COLUMN_ANNO_PADDING <- unit(2.5, "mm")

gg_metab_heatmap <-
  heatmap_data %>% 
  mutate(compound = str_to_title(compound),
         compound = recode(compound,
                          Preq1 = "PreQ1")) %>%
  left_join(heatmap_lookup) %>% 
  left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1) %>% select(compound, p.adj, p), by = "compound") %>%
  mutate(class = factor(
    class,
    levels = c(
      "Fatty Acid",                   # 1
      "Dicarboxylic Acid",            # 2
      "Amino Acid",                   # 3
      "Bile Acid",                    # 4
      "Indole",                       # 5
      "Phenolic Aromatic",            # 6
      "Kynurine Pathway",             # 7
      "Vitamin"                       # 8
    )
  ),
  subclass = factor(
    subclass,
    levels = c(
      "Short-Chain Fatty Acid",       # 1
      "Branched-Chain Fatty Acid",    # 2
      "Aminated Fatty Acid",          # 3
      "Long-Chain Fatty Acid",        # 4
      "Dicarboxylic Acid",            # 5
      "Amino Acid",                   # 6
      "Primary Bile Acid",            # 7
      "Secondary Bile Acid",          # 8
      "Conjugated Bile Acid",         # 9
      "Indole",                       # 10
      "Phenolic Aromatic",            # 11
      "Kynurine Pathway",             # 12
      "Vitamin"                       # 13
    )
  )) %>%
  mutate(
    diversity_group_abv = as.character(diversity_group_abv),
    diversity_group_abv = ifelse(
      grepl(pattern = "Healthy", as.character(diversity_group_abv)),
      diversity_group_abv,
      paste(diversity_group_abv, "Diversity")
    ),
    diversity_group_abv = factor(diversity_group_abv, levels = c("Low Diversity", "Medium Diversity",
                                                                 "High Diversity", "Healthy Donor"))
  ) %>% 
  arrange(class, subclass, p.adj, compound) %>%
  pivot_wider(c(diversity_group_abv, db, patientID, Shannon), names_from = compound, values_from = heatmap_val) %>%
  group_by(patientID) %>% 
  arrange(patientID) %>% 
  ungroup() %>% 
  select(-`NA`) %>%
  distinct(patientID, .keep_all = TRUE) %>%
  group_by(diversity_group_abv) %>% 
  arrange(db, Shannon) %>% 
  ungroup() %>% 
  select(-Shannon, -db, -diversity_group_abv) %>% 
  column_to_rownames("patientID") %>%
  as.matrix() %>% 
  t() %>%
  Heatmap(
    name = "Fold Change (log2)",
    col = col_fun,
    na_col = "grey83",
    rect_gp = gpar(col = "grey40", lwd = 1.5),
    column_names_gp = grid::gpar(fontsize = 8),
    column_gap = unit(2.5, "mm"),
    column_split = heatmap_labels,
    column_order = heatmap_column_order,
    column_title_gp = gpar(fontsize = 14),
    column_title_rot = 0,
    cluster_columns = FALSE,
    show_column_names = TRUE,
    show_column_dend = FALSE,
    row_names_gp = gpar(fontsize = 8),
    row_gap = unit(3.5, "mm"),
    row_names_side = c("left"),
    row_order = heatmap_order$compound,
    row_split = heatmap_order$subclass,
    row_title_rot = 0,
    cluster_rows = FALSE,
    show_row_dend = FALSE,
    heatmap_height = unit(13, "in"),
    heatmap_width =  unit(19, "in")
  )

gg_metab_heatmap_tot <- gg_metab_heatmap + pvalue_adj
gg_metab_heatmap_tot

pdf(file = "./Results/Figure_2.pdf", height = 15, width = 26, onefile = F)
gg_metab_heatmap_tot
invisible(dev.off())

Figure 3: Quant Metabolomics-Healthy Donor vs LT Patients

# Conversions of compounds
metab_conversions <- data.frame(
  compound = c(
    "Taurocholic Acid",
    "Glycocholic Acid",
    "Cholic Acid",
    "3-Oxolithocholic Acid",
    "Alloisolithocholic Acid",
    "Deoxycholic Acid",
    "Isodeoxycholic Acid",
    "Lithocholic Acid",
    "Kynurenic Acid",
    "Kynurenine",
    "Anthranilic Acid",
    "Desaminotyrosine",
    "Niacin",
    "Tyrosine",
    "Tryptamine",
    "Tryptophan",
    "Phenylalanine",
    "Acetate",
    "Butyrate",
    "Propionate",
    "Succinate"
  ),
  molar_mass__gmol = c(
    515.7,
    465.6,
    408.6,
    374.6,
    376.6,
    392.6,
    392.6,
    376.6,
    189.17,
    208.21,
    137.14,
    166.17,
    123.11,
    181.19,
    160.22,
    204.22,
    165.19,
    59.04,
    87.1,
    73.07,
    116.07
  )
)

metab_quant_converted <- metab_quant_anon %>%
  mutate(
    compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
    compound = str_to_title(compound)
  ) %>%
  mutate(units = case_when(compound %in% c(
    "Taurocholic Acid",
    "Glycocholic Acid",
    "Cholic Acid",
    "3-Oxolithocholic Acid",
    "Alloisolithocholic Acid",
    "Deoxycholic Acid",
    "Isodeoxycholic Acid",
    "Lithocholic Acid"
  ) ~ "ugmL",
  compound %in% c("Acetate", "Butyrate", "Succinate", "Propionate") ~ "mM",
  TRUE ~ "uM")) %>% 
  right_join(metab_conversions, by = "compound") %>% # right_join to only keep compounds we're interested in
  mutate(mvalue__mM = case_when(units == "ugmL" ~ (mvalue*               #ugmL
                                                  ((1000/                #1000 ml per L
                                                    1000000)/            #1000000 ug per gram
                                                    molar_mass__gmol)*   #gram per mol (molar mass)
                                                    1000                 #1000 mM per 1M                                            
                                                   ),
                                units == "uM" ~ mvalue/                  #uM
                                                1000,                    #1000uM per 1M
                                TRUE ~ mvalue #units are already in mM
                                )
         ) %>% 
  select(sampleID, compound, mvalue__mM)
  

metab_boxplot <- 
  metaphlan_df2 %>% 
  left_join(metaphlan_df_sumry) %>% 
  drop_na(taxid) %>% 
  select(sampleID, diversity_group_abv, db) %>% 
  group_by(sampleID) %>% 
  dplyr::slice(1) %>% 
  left_join(metab_quant_converted) %>% 
  mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
         compound = str_to_title(compound)) %>% 
  ungroup() %>%
  mutate(class = case_when(compound %in% c("Taurocholic Acid", "Glycocholic Acid") ~ "Conjugated Primary Bile Acid",
                           compound %in% c("Cholic Acid") ~ "Primary Bile Acid",
                           compound %in% c("3-Oxolithocholic Acid", "Alloisolithocholic Acid", "Deoxycholic Acid", "Isodeoxycholic Acid",
                                                "Lithocholic Acid") ~ "Secondary Bile Acid",
                           compound %in% c("Threonine", "Glycine", "Tyrosine", "Tyramine", "Serine", "Leucine", "Isoleucine",
                                               "Valine", "Phenylalanine", "Alanine", "Proline", "Aspartate", 
                                               "Methionine", "Glutamate", "Lysine", "Cysteine", "Tryptophan") ~ "Amino Acid",
                           compound %in% c("Acetate", "Butyrate", "Succinate", "Propionate") ~ "Short-Chain Fatty Acid",
                           compound %in% c("Kynurenic Acid", "Anthranilic Acid", "Kynurenine", "Tryptamine") ~ "Kynurenine Metabolite",
                           compound == "Desaminotyrosine" ~ "Phenolic Aromatic",
                           compound == "Niacin" ~ "B-Vitamin",
                              TRUE ~ "Indole"),
         compound = case_when(class == "Conjugated Primary Bile Acid" ~ paste(compound, "(1ËšConj. BA)"),
                              class == "Primary Bile Acid" ~ paste(compound, "(1Ëš BA)"),
                              class == "Secondary Bile Acid" ~ paste(compound, "(2Ëš BA)"),
                              class == "Short-Chain Fatty Acid" ~ paste(compound, "(SCFA)"),
                              class == "Amino Acid" ~ paste(compound, "(AA)"),
                              class == "Phenolic Aromatic" ~ paste(compound, "(Phen. Arom.)"),
                              class == "Indole" ~ paste0(compound, "(Indole)"),
                              class == "Kynurenine Metabolite" ~ paste(compound, "(Kyn. Metab.)"),
                              class == "B-Vitamin" ~ paste(compound, "(B-Vitamin)")
                              )) %>% 
  drop_na() %>% 
  group_by(compound) %>% 
  mutate(count = length(unique(diversity_group_abv))) %>% 
  filter(count == 4) %>% 
  select(-count) %>%
  mutate(compound = factor(
    compound,
    levels = c(
      "Acetate (SCFA)",                  # 1
      "Propionate (SCFA)",               # 2
      "Butyrate (SCFA)",                 # 3
      "Succinate (SCFA)",                # 4
      "Tyrosine (AA)",                   # 5
      "Tryptophan (AA)",                 # 6
      "Phenylalanine (AA)",              # 7
      "Cholic Acid (1Ëš BA)",             # 8
      "Glycocholic Acid (1ËšConj. BA)",   # 9
      "Taurocholic Acid (1ËšConj. BA)",   # 10
      "Deoxycholic Acid (2Ëš BA)",        # 11
      "Lithocholic Acid (2Ëš BA)",        # 12
      "Isodeoxycholic Acid (2Ëš BA)",     # 13
      "3-Oxolithocholic Acid (2Ëš BA)",   # 14
      "Alloisolithocholic Acid (2Ëš BA)", # 15
      "Desaminotyrosine (Phen. Arom.)",  # 16
      "Kynurenine (Kyn. Metab.)",        # 17
      "Anthranilic Acid (Kyn. Metab.)",  # 18
      "Tryptamine (Kyn. Metab.)",        # 19
      "Niacin (B-Vitamin)"               # 20
    )
  ),
  class = factor(
    class,
    levels = c(
      "Short-Chain Fatty Acid",          # 1
      "Amino Acid",                      # 2
      "Primary Bile Acid",               # 3
      "Conjugated Primary Bile Acid",    # 4
      "Secondary Bile Acid",             # 5
      "Indole",                          # 6
      "Phenolic Aromatic",               # 7
      "Kynurenine Metabolite",           # 8
      "B-Vitamin"                        # 9
    )
  )) %>% 
  filter(compound != "Kynurenic Acid") %>% 
  mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor")),
         diversity_group_abv = factor(diversity_group_abv, levels = c("Low", "Medium", "High", "Healthy Donor"))) %>% 
  filter(compound %in% c(
    "Acetate (SCFA)",
    "Butyrate (SCFA)",
    "Propionate (SCFA)",
    "Glycocholic Acid (1ËšConj. BA)",
    "Taurocholic Acid (1ËšConj. BA)",
    "Cholic Acid (1Ëš BA)",
    "Deoxycholic Acid (2Ëš BA)",
    "Lithocholic Acid (2Ëš BA)",
    "Alloisolithocholic Acid (2Ëš BA)"
    ))


metab_boxplot_stats <-
  metab_boxplot %>% 
  group_by(class, compound) %>% 
  rstatix::wilcox_test(mvalue__mM~diversity_group_abv,
                       comparisons = diversity_comps,
                       p.adjust.method = "none",
                       alternative= "two.sided") %>% 
  rstatix::adjust_pvalue(method = "BH") %>%
  rstatix::add_significance("p.adj") %>% 
  mutate(p.adj = ifelse(p.adj < 0.001, "p.adj < 0.001", paste("p.adj = ", round(p.adj, 2)))) %>% 
  mutate(y.position = case_when(group1 == "High" & group2 == "Healthy Donor" ~ 2.30,
                                group1 == "Medium" & group2 == "High" ~ 2.7,
                                group1 == "Low" & group2 == "High" ~ 3.1,
                                group1 == "Low" & group2 == "Medium" ~ 3.5)) # Set stats brackets to fixed points since the y-scale will be log10 transformed


set.seed(123) # for consistent jittering of points
gg_metab_boxplot <-
ggboxplot(metab_boxplot,
          x = "diversity_group_abv",
          y = "mvalue__mM",
          fill = "db",
          color = "diversity_group_abv",
          alpha = 0.65,
          outlier.shape = NA,
          facet.by = c("class", "compound")) +
    theme(legend.text = et(size = 12, color = "black"),
          legend.title = et(size = 14, color = "black"),
          axis.title.x = eb(),
          axis.title.y = et(size = 12, color = "black"),
          panel.border = eb(),
          strip.background = er(colour="white", fill="white"),
          ) +
    geom_hline(yintercept = 0) + 
    geom_segment(aes(x = 0.35, y = 0, xend = 0.35, yend = Inf)) +
    facet_wrap(~compound, scales = "fixed") +
    stat_pvalue_manual(metab_boxplot_stats, 
                       tip.length = 0.015) +
  geom_point(
    data = metab_boxplot,
    aes(x = diversity_group_abv, y = mvalue__mM, color = diversity_group_abv),
    position = position_jitter(width = 0.2),
    size = 2,
    alpha = 0.65) +
    scale_fill_manual("Cohort", values = rev(pirate_colors)) +
    scale_color_manual("Diversity Group", values = diversity_group_colors) +
    scale_y_log10(limits = c(0.01, 5000),
                  labels = c(0.01, 0.1, 1, 10, 100, 1000),
                  breaks = c(0.01, 0.1, 1, 10, 100, 1000),
                  expand = expansion(mult = c(0.1, 0.2))) +
    ylab("Concentration (mM)\n")

gg_metab_boxplot

cairo_pdf(filename = "./Results/Figure_3.pdf", width = 14, height = 10, onefile = FALSE)

gg_metab_boxplot

invisible(dev.off())

Figure 4A: Enterococcus Expansion and Infection

gg_ecoc_all_patients <- peri_matrix_all %>%
    distinct(sampleID, .keep_all = T) %>%
    select(sampleID, patientID, bact_infection_present) %>%
    mutate(bact_infection_present = ifelse(bact_infection_present ==
        "No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
        levels = c("Infection", "No Infection"))) %>%
    left_join(metaphlan_peri_anon %>%
        select(-bact_infection_present) %>%
        group_by(patientID, sampleID) %>%
        filter(grepl(x = Species, pattern = "Enterococcus", ignore.case = T)) %>%
        count(sampleID, wt = pctseqs, name = "enterococcus_rel_abundance")) %>%
    left_join(metaphlan_peri_anon %>%
        select(patientID, sampleID, Kingdom:pctseqs)) %>%
    ungroup() %>%
    arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
    mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-",
        Genus)) %>%
    group_by(sampleID) %>%
    arrange(Genus) %>%
    mutate(cum.pct = cumsum(pctseqs), y.text = (cum.pct + c(0,
        cum.pct[-length(cum.pct)]))/2) %>%
    ungroup() %>%
    dplyr::select(-cum.pct) %>%
    mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID),
        Genus = factor(Genus, levels = unique(Genus)), Genus = fct_relevel(Genus,
            "Firmicutes-Lactobacillales-Enterococcaceae-Enterococcus",
            after = Inf)) %>%
    arrange(-enterococcus_rel_abundance) %>%
    group_by(sampleID) %>%
    ggplot(aes(x = reorder(sampleID, -enterococcus_rel_abundance),
        y = pctseqs)) + geom_bar(aes(fill = Genus), stat = "identity") +
    theme_bw() + theme(legend.position = "none", axis.text.x = et(angle = 90,
    hjust = 0.5), strip.text.x = et(angle = 0, size = 14), strip.background = eb(),
    axis.title.y = et(color = "black", size = 12), axis.text.y = et(color = "black",
        size = 10)) + scale_fill_manual(values = metaphlan_pal2) +
    scale_y_continuous(expand = expansion(mult = c(0.005, 0.005))) +
    ylab("Shotgun Relative Abundance (%)\n") + xlab("") + facet_grid(~bact_infection_present,
    space = "free_x", scales = "free_x")
# gg_ecoc_all_patients


# Discrete heatmap of infections
ecoc_infx_orgs <- peri_criteria_all %>%
    filter(sampleID %in% peri_matrix_all$sampleID) %>%
    group_by(patientID, eday) %>%
    arrange(-infx_stool) %>%
    dplyr::slice(1) %>%
    ungroup() %>%
    select(patientID, sampleID, bact_infection_present, infx_stool,
        organism1, micro1.factor) %>%
    distinct() %>%
    mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
        replacement = ""), organism1 = str_to_lower(string = organism1),
        organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
            organism1, "Culture Negative")) %>%
    group_by(patientID, sampleID, infx_stool) %>%
    mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
        ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
        pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
        pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
        pattern = "enterococcusgallinarum", ignore.case = T),
        `Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
            ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
            pattern = "enterobactercloaceae", ignore.case = T),
        `Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
            ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
            pattern = "citrobacterfreundii", ignore.case = T),
        `Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
            ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
            pattern = "staphylococcusaureus", ignore.case = T),
        `Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
            ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
            pattern = "pseudomonasaeruginosa", ignore.case = T),
        `Stenotrophmonas maltophilia` = grepl(x = organism1,
            pattern = "stenotrophmonasmaltophilia", ignore.case = T),
        `Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
            ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
            pattern = "clostridiumdifficile|clostridioidesdifficile",
            ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
            pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
            pattern = "Culture Negative", ignore.case = T)) %>%
    pivot_longer(-c(patientID:micro1.factor), names_to = "organisms",
        values_to = "org_presence") %>%
    mutate(organisms = ifelse(bact_infection_present == "No",
        "No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
        TRUE, 1, 0)) %>%
    group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
    dplyr::slice_max(org_presence) %>%
    ungroup() %>%
    filter(org_presence == 1) %>%
    left_join(metaphlan_peri_anon %>%
        select(-bact_infection_present) %>%
        group_by(patientID, sampleID) %>%
        filter(grepl(x = Species, pattern = "Enterococcus", ignore.case = T)) %>%
        count(sampleID, wt = pctseqs, name = "enterococcus_rel_abundance")) %>%
    left_join(metaphlan_peri_anon %>%
        select(patientID, sampleID, Kingdom:pctseqs)) %>%
    group_by(patientID, sampleID, organisms, org_presence) %>%
    dplyr::slice(1) %>%
    mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID)) %>%
    mutate(org_presence = ifelse(grepl(pattern = "No", x = bact_infection_present,
        ignore.case = T), 0, org_presence)) %>%
    ungroup() %>%
    mutate(organisms = ifelse(bact_infection_present == "Yes" &
        org_presence == 0, "Other", organisms)) %>%
    arrange(-enterococcus_rel_abundance) %>%
    mutate(organisms = ifelse(grepl(x = organisms, pattern = "enterococcus|klebsiella|escherichia|proteus|citrobacter|culture",
        ignore.case = TRUE), organisms, "Other Bacterial Infection"),
        organisms = ifelse(bact_infection_present == "No", "No Bacterial Infection",
            organisms))

ecoc_infx_orgs_order <- ecoc_infx_orgs %>%
    group_by(sampleID, patientID, organisms) %>%
    distinct(sampleID, patientID, .keep_all = T) %>%
    ungroup() %>%
    mutate(organisms = ifelse(grepl(organisms, pattern = "gallinarum"),
        "Other Bacterial Infection", organisms), org_colors = case_when(grepl(x = organisms,
        pattern = "enterococcus faecium", ignore.case = T) ~
        1, grepl(x = organisms, pattern = "enterococcus faecalis",
        ignore.case = T) ~ 2, grepl(x = organisms, pattern = "enterococcus avium",
        ignore.case = T) ~ 3, grepl(x = organisms, pattern = "klebsiella pneumoniae",
        ignore.case = T) ~ 4, grepl(x = organisms, pattern = "escherichia coli",
        ignore.case = T) ~ 5, grepl(x = organisms, pattern = "proteus mirabilis",
        ignore.case = T) ~ 6, grepl(x = organisms, pattern = "citrobacter freundii",
        ignore.case = T) ~ 7, grepl(x = organisms, pattern = "other bacterial infection|gallinarum",
        ignore.case = T) ~ 8, grepl(x = organisms, pattern = "culture negative",
        ignore.case = T) ~ 9, TRUE ~ 0), organisms = as.factor(organisms),
        organisms = factor(organisms, levels = c("Enterococcus faecium",
            "Enterococcus faecalis", "Enterococcus avium", "Klebsiella pneumoniae",
            "Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
            "Other Bacterial Infection", "Culture Negative",
            "No Bacterial Infection")), org_colors = factor(org_colors,
            levels = c("1", "2", "3", "4", "5", "6", "7", "8",
                "9", "0"))) %>%
    left_join(ecoc_infx_orgs) %>%
    mutate(organisms = factor(organisms, levels = c("Enterococcus faecium",
        "Enterococcus faecalis", "Enterococcus avium", "Klebsiella pneumoniae",
        "Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
        "Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")),
        org_colors = factor(org_colors, levels = c("1", "2",
            "3", "4", "5", "6", "7", "8", "9", "0"))) %>%
    drop_na(organisms) %>%
    mutate(bact_infection_present = ifelse(bact_infection_present ==
        "No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
        levels = c("Infection", "No Infection")))

gg_ecoc_infx_orgs <- ecoc_infx_orgs_order %>%
    droplevels() %>%
    ungroup() %>%
    ggplot(., aes(x = reorder(sampleID, -enterococcus_rel_abundance),
        y = organisms, fill = as.factor(org_colors))) + geom_tile(color = "black") +
    theme_bw() + theme(axis.title.y = et(color = "black", size = 12),
    axis.text.y = et(color = "black", size = 10), axis.ticks.x = eb(),
    axis.text.x = eb(), axis.title.x = eb(), panel.grid = eb(),
    strip.text = eb()) + scale_fill_manual(values = c("#129246",
    "#0C7A3A", "#08592B", "#FF0000", "#CC0404", "#8A0202", "#5C0202",
    "#E6C66E", "#BD992D", "#00000000"), labels = c("Enterococcus faecium",
    "Enterococcus faecalis", "Enterococcus avium", "Klebsiella pneumoniae",
    "Escherichia coli", "Citrobacter freundii", "Proteus mirabilis",
    "Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")) +
    labs(fill = "Infecting Organism", y = "Infecting Organism\n") +
    scale_y_discrete(expand = expansion(mult = c(0.005, 0.005)),
        limits = rev(levels(ecoc_infx_orgs_order$organisms))) +
    facet_grid(. ~ bact_infection_present, space = "free_x",
        scales = "free_x")

# gg_ecoc_infx_orgs

yingtools2::gg.stack(gg_ecoc_all_patients, gg_ecoc_infx_orgs,
    heights = c(1, 0.5), adjust.themes = T)

pdf(file = "./Results/Figure_4A.pdf", height = 8, width = 15,
    onefile = F)

yingtools2::gg.stack(gg_ecoc_all_patients, gg_ecoc_infx_orgs,
    heights = c(1, 0.5), adjust.themes = T)

invisible(dev.off())

Figure 4B: Enterobacterales Expansion and Infection

gg_ebac_all_patients <- peri_matrix_all %>%
    distinct(sampleID, .keep_all = T) %>%
    select(sampleID, patientID, bact_infection_present) %>%
    mutate(bact_infection_present = ifelse(bact_infection_present ==
        "No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
        levels = c("Infection", "No Infection"))) %>%
    left_join(metaphlan_peri_anon %>%
        select(-bact_infection_present) %>%
        group_by(patientID, sampleID) %>%
        filter(grepl(x = Species, pattern = "Klebsiella pneumoniae|Escherichia coli|Citrobacter freundii|Proteus mirabilis")) %>%
        count(sampleID, wt = pctseqs, name = "enterobacterales_rel_abundance")) %>%
    left_join(metaphlan_peri_anon %>%
        select(patientID, sampleID, Kingdom:pctseqs)) %>%
    ungroup() %>%
    arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
    mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-",
        Genus)) %>%
    group_by(sampleID) %>%
    arrange(Genus) %>%
    mutate(cum.pct = cumsum(pctseqs), y.text = (cum.pct + c(0,
        cum.pct[-length(cum.pct)]))/2) %>%
    ungroup() %>%
    dplyr::select(-cum.pct) %>%
    mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID),
        Genus = factor(Genus, levels = unique(Genus)), Genus = fct_relevel(Genus,
            c("Proteobacteria-Enterobacterales-Enterobacteriaceae-Citrobacter",
                "Proteobacteria-Enterobacterales-Enterobacteriaceae-Enterobacter",
                "Proteobacteria-Enterobacterales-Enterobacteriaceae-Escherichia",
                "Proteobacteria-Enterobacterales-Enterobacteriaceae-Klebsiella",
                "Proteobacteria-Enterobacterales-Morganellaceae-Proteus"),
            after = Inf)) %>%
    arrange(-enterobacterales_rel_abundance) %>%
    ggplot(aes(x = reorder(sampleID, -enterobacterales_rel_abundance),
        y = pctseqs)) + geom_bar(aes(fill = Genus), stat = "identity") +
    theme_bw() + theme(legend.position = "none", axis.text.x = et(angle = 90,
    hjust = 0.5), strip.text.x = et(angle = 0, size = 14), strip.background = eb(),
    axis.title.y = et(color = "black", size = 12), axis.text.y = et(color = "black",
        size = 10)) + scale_fill_manual(values = metaphlan_pal2) +
    scale_y_continuous(expand = expansion(mult = c(0.005, 0.005))) +
    ylab("Shotgun Relative Abundance (%)\n") + xlab("") + facet_grid(~bact_infection_present,
    space = "free_x", scales = "free_x")
# gg_ebac_all_patients

# Discrete heatmap of infections
ebac_infx_orgs <- peri_criteria_all %>%
    filter(sampleID %in% peri_matrix_all$sampleID) %>%
    group_by(patientID, eday) %>%
    arrange(-infx_stool) %>%
    dplyr::slice(1) %>%
    ungroup() %>%
    select(patientID, sampleID, bact_infection_present, infx_stool,
        organism1, micro1.factor) %>%
    distinct() %>%
    mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
        replacement = ""), organism1 = str_to_lower(string = organism1),
        organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
            organism1, "Culture Negative")) %>%
    group_by(patientID, sampleID, infx_stool) %>%
    mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
        ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
        pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
        pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
        pattern = "enterococcusgallinarum", ignore.case = T),
        `Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
            ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
            pattern = "enterobactercloaceae", ignore.case = T),
        `Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
            ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
            pattern = "citrobacterfreundii", ignore.case = T),
        `Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
            ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
            pattern = "staphylococcusaureus", ignore.case = T),
        `Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
            ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
            pattern = "pseudomonasaeruginosa", ignore.case = T),
        `Stenotrophmonas maltophilia` = grepl(x = organism1,
            pattern = "stenotrophmonasmaltophilia", ignore.case = T),
        `Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
            ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
            pattern = "clostridiumdifficile|clostridioidesdifficile",
            ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
            pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
            pattern = "Culture Negative", ignore.case = T)) %>%
    pivot_longer(-c(patientID:micro1.factor), names_to = "organisms",
        values_to = "org_presence") %>%
    mutate(organisms = ifelse(bact_infection_present == "No",
        "No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
        TRUE, 1, 0)) %>%
    group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
    dplyr::slice_max(org_presence) %>%
    ungroup() %>%
    filter(org_presence == 1) %>%
    left_join(metaphlan_peri_anon %>%
        select(-bact_infection_present) %>%
        group_by(patientID, sampleID) %>%
        filter(grepl(x = Species, pattern = "Klebsiella pneumoniae|Escherichia coli|Citrobacter freundii|Proteus mirabilis")) %>%
        count(sampleID, wt = pctseqs, name = "enterobacterales_rel_abundance")) %>%
    left_join(metaphlan_peri_anon %>%
        select(patientID, sampleID, Kingdom:pctseqs)) %>%
    group_by(patientID, sampleID, organisms, org_presence) %>%
    dplyr::slice(1) %>%
    mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID)) %>%
    mutate(org_presence = ifelse(grepl(pattern = "No", x = bact_infection_present,
        ignore.case = T), 0, org_presence)) %>%
    ungroup() %>%
    mutate(organisms = ifelse(bact_infection_present == "Yes" &
        org_presence == 0, "Other", organisms)) %>%
    arrange(-enterobacterales_rel_abundance) %>%
    mutate(organisms = ifelse(grepl(x = organisms, pattern = "enterococcus|klebsiella|escherichia|proteus|citrobacter|culture",
        ignore.case = TRUE), organisms, "Other Bacterial Infection"),
        organisms = ifelse(bact_infection_present == "No", "No Bacterial Infection",
            organisms))

ebac_infx_orgs_order <- ebac_infx_orgs %>%
    group_by(sampleID, patientID, organisms) %>%
    distinct(sampleID, patientID, .keep_all = T) %>%
    ungroup() %>%
    mutate(organisms = ifelse(grepl(organisms, pattern = "gallinarum"),
        "Other Bacterial Infection", organisms), org_colors = case_when(grepl(x = organisms,
        pattern = "enterococcus faecium", ignore.case = T) ~
        1, grepl(x = organisms, pattern = "enterococcus faecalis",
        ignore.case = T) ~ 2, grepl(x = organisms, pattern = "enterococcus avium",
        ignore.case = T) ~ 3, grepl(x = organisms, pattern = "klebsiella pneumoniae",
        ignore.case = T) ~ 4, grepl(x = organisms, pattern = "escherichia coli",
        ignore.case = T) ~ 5, grepl(x = organisms, pattern = "proteus mirabilis",
        ignore.case = T) ~ 6, grepl(x = organisms, pattern = "citrobacter freundii",
        ignore.case = T) ~ 7, grepl(x = organisms, pattern = "other bacterial infection|gallinarum",
        ignore.case = T) ~ 8, grepl(x = organisms, pattern = "culture negative",
        ignore.case = T) ~ 9, TRUE ~ 0), organisms = as.factor(organisms),
        organisms = factor(organisms, levels = c("Klebsiella pneumoniae",
            "Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
            "Enterococcus faecium", "Enterococcus faecalis",
            "Enterococcus avium", "Other Bacterial Infection",
            "Culture Negative", "No Bacterial Infection")), org_colors = factor(org_colors,
            levels = c("4", "5", "6", "7", "1", "2", "3", "8",
                "9", "0"))) %>%
    left_join(ebac_infx_orgs) %>%
    mutate(organisms = factor(organisms, levels = c("Klebsiella pneumoniae",
        "Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
        "Enterococcus faecium", "Enterococcus faecalis", "Enterococcus avium",
        "Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")),
        org_colors = factor(org_colors, levels = c("4", "5",
            "6", "7", "1", "2", "3", "8", "9", "0"))) %>%
    drop_na(organisms) %>%
    mutate(bact_infection_present = ifelse(bact_infection_present ==
        "No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
        levels = c("Infection", "No Infection")))

gg_ebac_infx_orgs <- ebac_infx_orgs_order %>%
    ggplot(., aes(x = reorder(sampleID, -enterobacterales_rel_abundance),
        y = organisms, fill = as.factor(org_colors))) + geom_tile(color = "black") +
    theme_bw() + theme(axis.title.y = et(color = "black", size = 12),
    axis.text.y = et(color = "black", size = 10), axis.ticks.x = eb(),
    axis.text.x = eb(), axis.title.x = eb(), panel.grid = eb(),
    strip.text = eb()) + scale_fill_manual(values = c("#FF0000",
    "#CC0404", "#8A0202", "#5C0202", "#129246", "#0C7A3A", "#08592B",
    "#E6C66E", "#BD992D", "#00000000"), labels = c("Klebsiella pneumoniae",
    "Escherichia coli", "Citrobacter freundii", "Proteus mirabilis",
    "Enterococcus faecium", "Enterococcus faecalis", "Enterococcus avium",
    "Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")) +
    labs(fill = "Infecting Organism", y = "Infecting Organism\n") +
    scale_y_discrete(expand = expansion(mult = c(0.005, 0.005)),
        limits = rev(levels(ebac_infx_orgs_order$organisms))) +
    facet_grid(. ~ bact_infection_present, space = "free_x",
        scales = "free_x")

# gg_ebac_infx_orgs

yingtools2::gg.stack(gg_ebac_all_patients, gg_ebac_infx_orgs,
    heights = c(1, 0.5), adjust.themes = T)

pdf(file = "./Results/Figure_4B.pdf", height = 8, width = 15,
    onefile = F)

yingtools2::gg.stack(gg_ebac_all_patients, gg_ebac_infx_orgs,
    heights = c(1, 0.5), adjust.themes = T)

invisible(dev.off())

Figure 5B: Quant Metabolomics-Enterococcus Expansion

# Qual compounds that we can quantify and that are also
# significant in the volcano analysis for both
signif_compounds <- qual_tot_ecoc_expan %>%
    rownames_to_column(var = "compound") %>%
    filter(p.adj <= 0.05 & abs(log2fc_val) > 0.75) %>%
    distinct(compound)

metab_boxplot_ecoc_expan <- peri_matrix_all %>%
    select(sampleID, enterococcus_rel_abundance) %>%
    left_join(sample_lookup) %>%
    group_by(sampleID) %>%
    slice(1) %>%
    left_join(metab_quant_converted %>%
        filter(compound %in% signif_compounds$compound)) %>%
    filter(compound %!in% c("Kynurenine", "Anthranilic Acid")) %>%
    mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
        compound), compound = str_to_title(compound)) %>%
    ungroup() %>%
    mutate(db = ifelse(db == "HealthyDonor", "Healthy Donor",
        "Liver Transplant") %>%
        factor(., levels = c("Healthy Donor", "Liver Transplant")),
        class = case_when(compound %in% c("Taurocholic Acid",
            "Glycocholic Acid") ~ "Conjugated Primary Bile Acid",
            compound %in% c("Cholic Acid") ~ "Primary Bile Acid",
            compound %in% c("3-Oxolithocholic Acid", "Alloisolithocholic Acid",
                "Deoxycholic Acid", "Isodeoxycholic Acid", "Lithocholic Acid") ~
                "Secondary Bile Acid", compound %in% c("Threonine",
                "Glycine", "Tyrosine", "Tyramine", "Serine",
                "Leucine", "Isoleucine", "Valine", "Phenylalanine",
                "Alanine", "Proline", "Aspartate", "Methionine",
                "Glutamate", "Lysine", "Cysteine", "Tryptophan") ~
                "Amino Acid", compound %in% c("Acetate", "Butyrate",
                "Succinate", "Propionate") ~ "Short-Chain Fatty Acid",
            compound %in% c("Kynurenic Acid", "Anthranilic Acid",
                "Kynurenine", "Tryptamine") ~ "Kynurenine Metabolite",
            compound == "Desaminotyrosine" ~ "Phenolic Aromatic",
            compound == "Niacin" ~ "B-Vitamin", TRUE ~ "Indole")) %>%
    drop_na() %>%
    group_by(compound) %>%
    mutate(class = factor(class, levels = c("Conjugated Primary Bile Acid",
        "Primary Bile Acid", "Secondary Bile Acid", "Short-Chain Fatty Acid",
        "Amino Acid", "Phenolic Aromatic", "Indole", "Kynurenine Metabolite",
        "B-Vitamin")), compound = case_when(class == "Conjugated Primary Bile Acid" ~
        paste(compound, "(1ËšConj. BA)"), class == "Primary Bile Acid" ~
        paste(compound, "(1Ëš BA)"), class == "Secondary Bile Acid" ~
        paste(compound, "(2Ëš BA)"), class == "Short-Chain Fatty Acid" ~
        paste(compound, "(SCFA)"), class == "Amino Acid" ~ paste(compound,
        "(AA)"), class == "Phenolic Aromatic" ~ paste(compound,
        "(Phen. Arom.)"), class == "Indole" ~ paste0(compound,
        "(Indole)"), class == "Kynurenine Metabolite" ~ paste(compound,
        "(Kyn. Metab.)"), class == "B-Vitamin" ~ paste(compound,
        "(B-Vitamin)")), compound = factor(compound, levels = c("Acetate (SCFA)",
        "Butyrate (SCFA)", "Propionate (SCFA)", "Succinate (SCFA)",
        "Taurocholic Acid (1ËšConj. BA)", "Glycocholic Acid (1ËšConj. BA)",
        "Cholic Acid (1Ëš BA)", "3-Oxolithocholic Acid (2Ëš BA)",
        "Alloisolithocholic Acid (2Ëš BA)", "Deoxycholic Acid (2Ëš BA)",
        "Isodeoxycholic Acid (2Ëš BA)", "Lithocholic Acid (2Ëš BA)",
        "Kynurenic Acid (Kyn. Metab.)", "Kynurenine (Kyn. Metab.)",
        "Anthranilic Acid (Kyn. Metab.)", "Desaminotyrosine",
        "Niacin", "Tyrosine", "Tryptamine", "Tryptophan", "Phenylalanine"))) %>%
    ungroup() %>%
    drop_na() %>%
    mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
        optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0)) %>%
    arrange(enterococcus_expansion, sampleID) %>%
    group_by(compound) %>%
    mutate(enterococcus_expansion_0 = length(mvalue__mM[enterococcus_expansion ==
        "0"]), enterococcus_expansion_1 = length(mvalue__mM[enterococcus_expansion ==
        "1"])) %>%
    filter(any(mvalue__mM != 0)) %>%
    mutate(enterococcus_expansion = ifelse(enterococcus_expansion ==
        "1", "Expansion", "No Expansion"))


metab_boxplot_summary_stats_ecoc_expan <- metab_boxplot_ecoc_expan %>%
    group_by(compound) %>%
    summarise(y.position = max(mvalue__mM) * 1.1)

metab_boxplot_stats_ecoc_expan <- metab_boxplot_ecoc_expan %>%
    group_by(class, compound) %>%
    rstatix::wilcox_test(mvalue__mM ~ enterococcus_expansion) %>%
    rstatix::adjust_pvalue(method = "BH") %>%
    rstatix::add_significance("p.adj") %>%
    mutate(p.adj = ifelse(p.adj < 0.001, "p.adj < 0.001", paste("p.adj = ",
        round(p.adj, 3)))) %>%
    add_xy_position(x = "enterococcus_expansion") %>%
    select(-y.position) %>%
    left_join(metab_boxplot_summary_stats_ecoc_expan)


gg_metab_boxplot_ecoc_expan <- ggboxplot(metab_boxplot_ecoc_expan,
    x = "enterococcus_expansion", y = "mvalue__mM", fill = "enterococcus_expansion",
    alpha = 0.65, outlier.shape = NA) + theme(legend.text = et(size = 14,
    color = "black"), legend.title = et(size = 14, color = "black"),
    axis.title.x = eb(), axis.title.y = et(size = 16, color = "black"),
    strip.text = et(size = 14, color = "black"), strip.background = eb(),
    panel.border = eb(), axis.line = eb()) + facet_wrap(~compound,
    scales = "free_y", nrow = 2) + annotate("segment", x = 0.35,
    xend = 0.35, y = 0, yend = Inf, colour = "black", linewidth = 1) +
    annotate("segment", x = 0.35, xend = Inf, y = 0, yend = 0,
        colour = "black") + stat_pvalue_manual(metab_boxplot_stats_ecoc_expan,
    label = "{p.adj}", bracket.size = 0.5) + geom_point(data = metab_boxplot_ecoc_expan,
    aes(x = enterococcus_expansion, y = mvalue__mM, fill = enterococcus_expansion),
    position = position_jitter(width = 0.2), size = 2, shape = 21,
    alpha = 0.65, color = "black") + scale_fill_manual("Enterococcus",
    values = c("gray87", "#389458")) + scale_y_continuous(expand = expansion(mult = c(0.1,
    0.2))) + ylab("Concentration (mM)\n")

gg_metab_boxplot_ecoc_expan

cairo_pdf(filename = "./Results/Figure_5B.pdf", height = 8, width = 16,
    onefile = TRUE)

gg_metab_boxplot_ecoc_expan

invisible(dev.off())

Figure 6A: sPLS-DA Diversity Groups and Healthy Donors using Qualitative Metabolomics

diversity_metab_mat <-
  metab_qual_anon %>% filter(sampleID %in% first_samps$sampleID) %>% 
  mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
         compound = str_to_title(compound)) %>%
  filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
  ungroup() %>%
  left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group_abv)) %>% 
  group_by(sampleID, compound, diversity_group_abv) %>% 
  summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>% 
  ungroup() %>%
  mutate_all(~replace(., is.nan(.), NA)) %>% 
  select(sampleID, compound, mvalue, diversity_group_abv) %>% 
  drop_na(sampleID) %>% 
  pivot_wider(names_from = "compound", values_from = "mvalue") %>% 
  filter(sampleID != "") %>%
  column_to_rownames(var = "sampleID") %>% 
  select(-`NA`, - diversity_group_abv) %>% 
  filter_all(any_vars(!is.na(.)))

diversity_metab_labs <-
  diversity_metab_mat %>% 
  rownames_to_column(var = "sampleID") %>% 
  left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group_abv)) %>% 
  droplevels() %>% 
  pull(diversity_group_abv)

dim(diversity_metab_mat) #123 93 (means 93 compounds and 102 LT patients/infections)
## [1] 102  93
length(diversity_metab_labs) #123 (means 102 LT patients/infections)
## [1] 102
# Begin model training
set.seed(1234)
diversity_train <- sample(1:nrow(diversity_metab_mat), as.integer(0.7*nrow(diversity_metab_mat))) # randomly select 70% samples in training
diversity_test <- setdiff(1:nrow(diversity_metab_mat), diversity_train) # rest is part of the test set

# store matrices into training and test set:
diversity_metab_mat.train <- diversity_metab_mat[diversity_train, ]
diversity_metab_mat.test <- diversity_metab_mat[diversity_test,]
diversity_metab_labs.train <- diversity_metab_labs[diversity_train]
diversity_metab_labs.test <- diversity_metab_labs[diversity_test]

# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
diversity_train_splsda <- mixOmics::splsda(diversity_metab_mat.train, diversity_metab_labs.train, ncomp = 5)

# Performance assessment
## 5-fold, 50-repeat cross validation
set.seed(1234)
diversity_train_plsda_perf <-
  perf(
    diversity_train_splsda,
    validation = "Mfold",
    folds = 5,
    progressBar = FALSE,
    auc = TRUE,
    nrepeat = 50
  )

plot(
  diversity_train_plsda_perf,
  col = color.mixo(5:7),
  sd = FALSE,
  auc = TRUE,
  legend.position = "horizontal"
) # ncomp = 4 might be best for classification error rate and max.dist

# Number of optimal variables to select for each component
diversity_train_keepX <- c(1:10,  seq(20, 130, 10))

set.seed(123)
diversity_train_tune_splsda <-
  mixOmics::tune.splsda(
    diversity_metab_mat.train,
    diversity_metab_labs.train,
    ncomp = 4, # Choose 4 components (max) to be safe
    validation = 'Mfold',
    folds = 5,
    dist = 'max.dist',
    progressBar = FALSE,
    auc = TRUE,
    measure = "BER",
    test.keepX = diversity_train_keepX,
    nrepeat = 50
  )

plot(diversity_train_tune_splsda, col = color.jet(4))

diversity_train_error <- diversity_train_tune_splsda$error.rate
diversity_train_ncomp <- diversity_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
diversity_train_ncomp #2 components are optimal
## [1] 2
diversity_train_select_keepX <- diversity_train_tune_splsda$choice.keepX[1:diversity_train_ncomp]  # optimal number of variables to select per component
diversity_train_select_keepX
## comp1 comp2 
##    60    10
# Final Model
diversity_train_splsda_final <-
  mixOmics::splsda(diversity_metab_mat.train, diversity_metab_labs.train, ncomp = diversity_train_ncomp, keepX = diversity_train_select_keepX)

# Test the model
diversity_predict_train_splsda_final <- predict(diversity_train_splsda_final, diversity_metab_mat.test, 
                                dist = "max.dist")

diversity_predict_train_comp2 <- diversity_predict_train_splsda_final$class$max.dist[,diversity_train_ncomp]

diversity_union <- union(diversity_predict_train_comp2, diversity_metab_labs.test)

confusionMatrix(table(factor(diversity_predict_train_comp2, diversity_union,
                             levels = c("Low", "Medium", "High")),
                      factor(diversity_metab_labs.test, diversity_union,
                             levels = c("Low", "Medium", "High"))))
## Confusion Matrix and Statistics
## 
##         
##          Low High Medium
##   Low      8    7      5
##   High     0    3      2
##   Medium   0    1      5
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5161          
##                  95% CI : (0.3306, 0.6985)
##     No Information Rate : 0.3871          
##     P-Value [Acc > NIR] : 0.099557        
##                                           
##                   Kappa : 0.3101          
##                                           
##  Mcnemar's Test P-Value : 0.006324        
## 
## Statistics by Class:
## 
##                      Class: Low Class: High Class: Medium
## Sensitivity              1.0000     0.27273        0.4167
## Specificity              0.4783     0.90000        0.9474
## Pos Pred Value           0.4000     0.60000        0.8333
## Neg Pred Value           1.0000     0.69231        0.7200
## Prevalence               0.2581     0.35484        0.3871
## Detection Rate           0.2581     0.09677        0.1613
## Detection Prevalence     0.6452     0.16129        0.1935
## Balanced Accuracy        0.7391     0.58636        0.6820
diversity_train_background <- background.predict(diversity_train_splsda_final,
                                            comp.predicted = 2,
                                            xlim = c(-20,20),
                                            ylim = c(-20,20),
                                            dist = "centroids.dist") 

# Model metrics for all samples
diversity_tot <- predict(diversity_train_splsda_final,
                        diversity_metab_mat,
                        dist = "max.dist")

diversity_tot_predict <- diversity_tot$class$max.dist[,diversity_train_ncomp]

diversity_tot_union <- union(diversity_tot_predict, diversity_metab_labs)

diversity_cm <- confusionMatrix(table(factor(diversity_tot_predict, diversity_tot_union,
                                        levels = c("Low", "Medium", "High")),
                      factor(diversity_metab_labs, diversity_tot_union,
                             levels = c("Low", "Medium", "High"))),
                        )
diversity_cm
## Confusion Matrix and Statistics
## 
##         
##          Low High Medium
##   Low     34   16      7
##   High     2   21      3
##   Medium   1    2     16
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6961          
##                  95% CI : (0.5971, 0.7833)
##     No Information Rate : 0.3824          
##     P-Value [Acc > NIR] : 1.375e-10       
##                                           
##                   Kappa : 0.5341          
##                                           
##  Mcnemar's Test P-Value : 0.001377        
## 
## Statistics by Class:
## 
##                      Class: Low Class: High Class: Medium
## Sensitivity              0.9189      0.5385        0.6154
## Specificity              0.6462      0.9206        0.9605
## Pos Pred Value           0.5965      0.8077        0.8421
## Neg Pred Value           0.9333      0.7632        0.8795
## Prevalence               0.3627      0.3824        0.2549
## Detection Rate           0.3333      0.2059        0.1569
## Detection Prevalence     0.5588      0.2549        0.1863
## Balanced Accuracy        0.7825      0.7295        0.7880
# Additional model measures
diversity_epi <- mltest::ml_test(predicted = factor(diversity_tot_predict, levels = c("Low", "Medium", "High")),
                                 true = factor(diversity_metab_labs, levels = c("Low", "Medium", "High")))

diversity_cm_names <- diversity_cm$table
colnames(diversity_cm_names) <- c("Actual\nLow", "Actual\nMedium", "Actual\nHigh")
rownames(diversity_cm_names) <- c("Predicted\nLow", "Predicted\nMedium", "Predicted\nHigh")

diversity_confusion_df <- diversity_cm_names %>% 
  t() 

{
pdf(file = "./Results/Figure_6A.pdf", height = 10, width = 10)
plotIndiv(
  diversity_train_splsda_final,
  xlim = c(min(diversity_train_splsda_final$variates$X[,1])*1.05, max(diversity_train_splsda_final$variates$X[,1])*1.8),
  ylim = c(min(diversity_train_splsda_final$variates$X[,2])*1.15, max(diversity_train_splsda_final$variates$X[,2])*1.8),
  comp = c(1,2),
  pch = 1,
  ind.names = FALSE,
  legend = FALSE,
  background = diversity_train_background,
  col = c("#3A001E", "#8A0246", "#C20463"),
  star = TRUE,
  point.lwd = 0.5,
  title = NULL,
  size.title = 0.00001,
  style = "graphics",
  legend.title = "Diversity Group",
  X.label = paste0("Component 1 (", round(diversity_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
  Y.label = paste0("Component 2 (", round(diversity_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
) 
addtable2plot(
  y = -6.68,
  x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
  diversity_confusion_df,
  bty = "o",
  display.rownames = TRUE,
  hlines = TRUE,
  vlines = TRUE,
  cex = 0.75,
  bg = "white"
)
text(
  y = -4.75,
  x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
  paste0("Overall ACC = ", 
                    paste0(formatC(round(diversity_cm$overall[1], 3)*100, digits = 1, format = "f"), "%"), " [",
                    paste0(formatC(round(diversity_cm$overall[3], 3)*100, digits = 1, format = "f"), "%"),", ",
                    paste0(formatC(round(diversity_cm$overall[4], 3)*100, digits = 1, format = "f"), "%"),"]"),
    cex = 0.75, adj = 0)

text(
  y = 3,
  x = 1.5,
  paste0("Low Diversity ACC = ", round(diversity_epi$balanced.accuracy[1]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#3A001E")
text(
  y = 2.7,
  x = 1.5,
  paste0("Low Diversity Sens. = ", round(diversity_epi$precision[1]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#3A001E")
text(
  y = 2.4,
  x = 1.5,
  paste0("Low Diversity Spec. = ", round(diversity_epi$specificity[1]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#3A001E")
# text(
#   y = 2.1,
#   x = 1.5,
#   paste0("Low Diversity OR = ", round(diversity_epi$DOR[1], 1)),
#     cex = 0.75, adj = 0, col = "#3A001E")

text(
  y = -5,
  x = 1.5,
  paste0("Medium Diversity ACC = ", round(diversity_epi$balanced.accuracy[2]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#8A0246")
text(
  y = -5.3,
  x = 1.5,
  paste0("Medium Diversity Sens. = ", round(diversity_epi$precision[2]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#8A0246")
text(
  y = -5.6,
  x = 1.5,
  paste0("Medium Diversity Spec. = ", round(diversity_epi$specificity[2]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#8A0246")
# text(
#   y = -5.9,
#   x = 1.5,
#   paste0("Medium Diversity OR = ", round(diversity_epi$DOR[2], 1)),
#     cex = 0.75, adj = 0, col = "#8A0246")

text(
  y = 3,
  x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
  paste0("High Diversity ACC = ", round(diversity_epi$balanced.accuracy[3]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#C20463")
text(
  y = 2.7,
  x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
  paste0("High Diversity Sens. = ", round(diversity_epi$precision[3]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#C20463")
text(
  y = 2.4,
  x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
  paste0("High Diversity Spec. = ", round(diversity_epi$specificity[3]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#C20463")
# text(
#   y = 2.1,
#   x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
#   paste0("High Diversity OR = ", round(diversity_epi$DOR[3], 1)),
#     cex = 0.75, adj = 0, col = "#C20463")

invisible(dev.off())
}

plotIndiv(
  diversity_train_splsda_final,
  xlim = c(min(diversity_train_splsda_final$variates$X[,1])*1.05, max(diversity_train_splsda_final$variates$X[,1])*1.8),
  ylim = c(min(diversity_train_splsda_final$variates$X[,2])*1.15, max(diversity_train_splsda_final$variates$X[,2])*1.8),
  comp = c(1,2),
  pch = 1,
  ind.names = FALSE,
  legend = FALSE,
  background = diversity_train_background,
  col = c("#3A001E", "#8A0246", "#C20463"),
  star = TRUE,
  point.lwd = 0.5,
  title = NULL,
  size.title = 0.00001,
  style = "graphics",
  legend.title = "Diversity Group",
  X.label = paste0("Component 1 (", round(diversity_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
  Y.label = paste0("Component 2 (", round(diversity_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
) 
addtable2plot(
  y = -6.68,
  x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
  diversity_confusion_df,
  bty = "o",
  display.rownames = TRUE,
  hlines = TRUE,
  vlines = TRUE,
  cex = 0.75,
  bg = "white"
)
text(
  y = -4.75,
  x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
  paste0("Overall ACC = ", 
                    paste0(formatC(round(diversity_cm$overall[1], 3)*100, digits = 1, format = "f"), "%"), " [",
                    paste0(formatC(round(diversity_cm$overall[3], 3)*100, digits = 1, format = "f"), "%"),", ",
                    paste0(formatC(round(diversity_cm$overall[4], 3)*100, digits = 1, format = "f"), "%"),"]"),
    cex = 0.75, adj = 0)

text(
  y = 3,
  x = 1.5,
  paste0("Low Diversity ACC = ", round(diversity_epi$balanced.accuracy[1]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#3A001E")
text(
  y = 2.7,
  x = 1.5,
  paste0("Low Diversity Sens. = ", round(diversity_epi$precision[1]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#3A001E")
text(
  y = 2.4,
  x = 1.5,
  paste0("Low Diversity Spec. = ", round(diversity_epi$specificity[1]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#3A001E")
# text(
#   y = 2.1,
#   x = 1.5,
#   paste0("Low Diversity OR = ", round(diversity_epi$DOR[1], 1)),
#     cex = 0.75, adj = 0, col = "#3A001E")

text(
  y = -5,
  x = 1.5,
  paste0("Medium Diversity ACC = ", round(diversity_epi$balanced.accuracy[2]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#8A0246")
text(
  y = -5.3,
  x = 1.5,
  paste0("Medium Diversity Sens. = ", round(diversity_epi$precision[2]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#8A0246")
text(
  y = -5.6,
  x = 1.5,
  paste0("Medium Diversity Spec. = ", round(diversity_epi$specificity[2]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#8A0246")
# text(
#   y = -5.9,
#   x = 1.5,
#   paste0("Medium Diversity OR = ", round(diversity_epi$DOR[2], 1)),
#     cex = 0.75, adj = 0, col = "#8A0246")

text(
  y = 3,
  x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
  paste0("High Diversity ACC = ", round(diversity_epi$balanced.accuracy[3]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#C20463")
text(
  y = 2.7,
  x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
  paste0("High Diversity Sens. = ", round(diversity_epi$precision[3]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#C20463")
text(
  y = 2.4,
  x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
  paste0("High Diversity Spec. = ", round(diversity_epi$specificity[3]*100, 1), "%"),
    cex = 0.75, adj = 0, col = "#C20463")

# text(
#   y = 2.1,
#   x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
#   paste0("High Diversity OR = ", round(diversity_epi$DOR[3], 1)),
#     cex = 0.75, adj = 0, col = "#C20463")

Figure 6B: sPLS-DA Any Bacterial Infection using Qualitative Metabolomics

infx_metab_mat <-
  metab_qual_anon %>%
  mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
       compound = str_to_title(compound)) %>%
  filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
  ungroup() %>%
  right_join(peri_matrix_all %>% select(sampleID, bact_infection_present)) %>% 
  group_by(sampleID, compound, bact_infection_present) %>% 
  summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>% 
  ungroup() %>%
  mutate_all(~replace(., is.nan(.), NA)) %>% 
  select(sampleID, compound, mvalue, bact_infection_present) %>% 
  drop_na(sampleID) %>% 
  pivot_wider(names_from = "compound", values_from = "mvalue") %>% 
  filter(sampleID != "") %>%
  mutate(bact_infection_present = ifelse(grepl(x = bact_infection_present, pattern = "No"), "No Infection", "Infection")) %>% 
  select(-bact_infection_present) %>% 
  column_to_rownames(var = "sampleID") %>% 
  select(-`NA`) %>% 
  filter_all(any_vars(!is.na(.)))

  
infx_metab_labs <-
  infx_metab_mat %>% 
  rownames_to_column(var = "sampleID") %>% 
  left_join(peri_matrix_all %>% select(sampleID, bact_infection_present)) %>% 
  mutate(bact_infection_present = ifelse(grepl(x = bact_infection_present, pattern = "No"), "No Infection", "Infection")) %>% 
  pull(bact_infection_present)

dim(infx_metab_mat) #108 93 (means 93 compounds and 108 LT patients/infections)
## [1] 108  93
length(infx_metab_labs) #108 (means 108 LT patients/infections)
## [1] 108
# Begin model training
set.seed(1234)
infx_train <- sample(1:nrow(infx_metab_mat), as.integer(0.7*nrow(infx_metab_mat))) # randomly select 70% samples in training
infx_test <- setdiff(1:nrow(infx_metab_mat), infx_train) # rest is part of the test set

# store matrices into training and test set:
infx_metab_mat.train <- infx_metab_mat[infx_train, ]
infx_metab_mat.test <- infx_metab_mat[infx_test,]
infx_metab_labs.train <- infx_metab_labs[infx_train]
infx_metab_labs.test <- infx_metab_labs[infx_test]

# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
infx_train_splsda <- mixOmics::splsda(infx_metab_mat.train, infx_metab_labs.train, ncomp = 5)

# Performance assessment
## 5-fold, 100-repeat cross validation
set.seed(1234)
infx_train_plsda_perf <-
  perf(
    infx_train_splsda,
    validation = "Mfold",
    folds = 5,
    progressBar = FALSE,
    auc = TRUE,
    nrepeat = 50
  )

plot(
  infx_train_plsda_perf,
  col = color.mixo(5:7),
  sd = FALSE,
  auc = TRUE,
  legend.position = "horizontal"
) # ncomp = 1 is best for classification error rate and max.dist

# Number of optimal variables to select for each component
infx_train_keepX <- c(1:10,  seq(20, 108, 10))

set.seed(123)
infx_train_tune_splsda <-
  mixOmics::tune.splsda(
    infx_metab_mat.train,
    infx_metab_labs.train,
    ncomp = 3, # Choose 3 components (max) to be safe
    validation = 'Mfold',
    folds = 5,
    dist = 'max.dist',
    progressBar = FALSE,
    auc = TRUE,
    measure = "BER",
    test.keepX = infx_train_keepX,
    nrepeat = 50
  )

plot(infx_train_tune_splsda, col = color.jet(3))

infx_train_error <- infx_train_tune_splsda$error.rate
infx_train_ncomp <- infx_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
infx_train_ncomp #1 component is optimal
## [1] 1
infx_train_select_keepX <- infx_train_tune_splsda$choice.keepX[1:(infx_train_ncomp + 1)]  # optimal number of variables to select per component
infx_train_select_keepX
## comp1 comp2 
##    90    80
# Final Model
infx_train_splsda_final <-
  mixOmics::splsda(infx_metab_mat.train, infx_metab_labs.train, ncomp = (infx_train_ncomp + 1), keepX = infx_train_select_keepX)

# Test the model
infx_predict_train_splsda_final <- predict(infx_train_splsda_final, infx_metab_mat.test, 
                                dist = "max.dist")

infx_predict_train_comp2 <- infx_predict_train_splsda_final$class$max.dist[,(infx_train_ncomp + 1)]

infx_union <- union(infx_predict_train_comp2, infx_metab_labs.test)

confusionMatrix(table(factor(infx_predict_train_comp2, infx_union),
                      factor(infx_metab_labs.test, infx_union)),
                 positive = "Infection")
## Confusion Matrix and Statistics
## 
##               
##                No Infection Infection
##   No Infection           20         5
##   Infection               7         1
##                                          
##                Accuracy : 0.6364         
##                  95% CI : (0.4512, 0.796)
##     No Information Rate : 0.8182         
##     P-Value [Acc > NIR] : 0.9965         
##                                          
##                   Kappa : -0.082         
##                                          
##  Mcnemar's Test P-Value : 0.7728         
##                                          
##             Sensitivity : 0.1667         
##             Specificity : 0.7407         
##          Pos Pred Value : 0.1250         
##          Neg Pred Value : 0.8000         
##              Prevalence : 0.1818         
##          Detection Rate : 0.0303         
##    Detection Prevalence : 0.2424         
##       Balanced Accuracy : 0.4537         
##                                          
##        'Positive' Class : Infection      
## 
infx_train_background <- background.predict(infx_train_splsda_final,
                                            comp.predicted = 2,
                                            xlim = c(-20,20),
                                            ylim = c(-20,20),
                                            dist = "centroids.dist") 

# Model metrics for all samples
infx_tot <- predict(infx_train_splsda_final,
                        infx_metab_mat,
                        dist = "max.dist")

infx_tot_predict <- infx_tot$class$max.dist[,(infx_train_ncomp + 1)]

infx_tot_union <- union(infx_tot_predict, infx_metab_labs)

infx_cm <- confusionMatrix(table(factor(infx_tot_predict, infx_tot_union,
                                        levels = c("Infection", "No Infection")),
                      factor(infx_metab_labs, infx_tot_union,
                             levels = c("Infection", "No Infection"))),
                      positive = "Infection")
infx_cm
## Confusion Matrix and Statistics
## 
##               
##                No Infection Infection
##   No Infection           17         9
##   Infection              10        72
##                                          
##                Accuracy : 0.8241         
##                  95% CI : (0.739, 0.8906)
##     No Information Rate : 0.75           
##     P-Value [Acc > NIR] : 0.04398        
##                                          
##                   Kappa : 0.525          
##                                          
##  Mcnemar's Test P-Value : 1.00000        
##                                          
##             Sensitivity : 0.8889         
##             Specificity : 0.6296         
##          Pos Pred Value : 0.8780         
##          Neg Pred Value : 0.6538         
##              Prevalence : 0.7500         
##          Detection Rate : 0.6667         
##    Detection Prevalence : 0.7593         
##       Balanced Accuracy : 0.7593         
##                                          
##        'Positive' Class : Infection      
## 
# Additional model measures
infx_epi <- epiR::epi.tests(table(infx_tot_predict, infx_metab_labs), conf.level = 0.95)

infx_confusion_df <- infx_epi$tab %>% 
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column(var = "actual") %>% 
  mutate(actual = case_when(grepl(actual, pattern = "+", fixed = TRUE) ~ "Actual\nInfection",
                            grepl(actual, pattern = "-", fixed = TRUE) ~ "Actual\nNo Infection",
                            TRUE ~ "Total")) %>% 
  dplyr::rename("Predicted\nInfection" = "Test +",
                "Predicted\nNo Infection" = "Test -") %>% 
  column_to_rownames(var = "actual")

{
pdf(file = "./Results/Figure_6B.pdf", height = 10, width = 10)
plotIndiv(
  infx_train_splsda_final,
  comp = c(1,2),
  pch = 1,
  ind.names = FALSE,
  legend = FALSE,
  background = infx_train_background,
  col = c("goldenrod", "gray75"),
  star = TRUE,
  point.lwd = 0.5,
  title = NULL,
  size.title = 0.00001,
  style = "graphics",
  legend.title = "Infection Group",
  X.label = paste0("Component 1 (", round(infx_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
  Y.label = paste0("Component 2 (", round(infx_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
) 
addtable2plot(
  y = max(infx_train_splsda_final$variates$X[,2])*0.1,
  x = min(infx_train_splsda_final$variates$X[,1]),
  infx_confusion_df,
  bty = "o",
  display.rownames = TRUE,
  hlines = TRUE,
  vlines = TRUE,
  cex = 0.75,
  bg = "white"
)
text(
  y = -4,
  x = -2.75,
  substitute(bold("Infection")), cex = 1.75, adj = 0, col = "goldenrod")
text(
  y = 4,
  x = -1,
  substitute(bold("No Infection")), cex = 1.75, adj = 0, col = "gray70")
text(
  y = max(infx_train_splsda_final$variates$X[,2])*1,
  x = min(infx_train_splsda_final$variates$X[,1]),
  paste0("ACC = ", 
                    round(infx_cm$overall[1]*100, 1), "% [",
                    round(infx_cm$overall[3]*100, 1), "%, ",
                    round(infx_cm$overall[4]*100, 1), "%]"),
    cex = 0.75, adj = 0)
text(
  y = max(infx_train_splsda_final$variates$X[,2])*0.9,
  x = min(infx_train_splsda_final$variates$X[,1]),
  paste0("Sens. = ", 
                    paste0(formatC(round(infx_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
                    paste0(formatC(round(infx_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
                    paste0(formatC(round(infx_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
    cex = 0.75, adj = 0)
text(
  y = max(infx_train_splsda_final$variates$X[,2])*0.8,
  x = min(infx_train_splsda_final$variates$X[,1]),
  paste0("Spec. = ", 
                    paste0(formatC(round(infx_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
                    paste0(formatC(round(infx_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
                    paste0(formatC(round(infx_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
    cex = 0.75, adj = 0)
text(
  y = max(infx_train_splsda_final$variates$X[,2])*0.7,
  x = min(infx_train_splsda_final$variates$X[,1]),
  paste0("OR = ",
                    formatC(round(infx_epi$detail[2][6,], 3), digits = 1, format = "f"),  " [",
                    formatC(round(infx_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
                    formatC(round(infx_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
    cex = 0.75, adj = 0)
invisible(dev.off())
}

 plotIndiv(
  infx_train_splsda_final,
  comp = c(1,2),
  pch = 1,
  ind.names = FALSE,
  legend = FALSE,
  background = infx_train_background,
  col = c("goldenrod", "gray87"),
  star = TRUE,
  point.lwd = 0.5,
  title = NULL,
  size.title = 0.00001,
  style = "graphics",
  legend.title = "Infection Group",
  X.label = paste0("Component 1 (", round(infx_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
  Y.label = paste0("Component 2 (", round(infx_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
) 
addtable2plot(
  y = max(infx_train_splsda_final$variates$X[,2])*0.1,
  x = min(infx_train_splsda_final$variates$X[,1]),
  infx_confusion_df,
  bty = "o",
  display.rownames = TRUE,
  hlines = TRUE,
  vlines = TRUE,
  cex = 0.75,
  bg = "white"
)
text(
  y = -4,
  x = -2.75,
  substitute(bold("Infection")), cex = 1.75, adj = 0, col = "goldenrod")
text(
  y = 4,
  x = -1,
  substitute(bold("No Infection")), cex = 1.75, adj = 0, col = "gray70")
text(
  y = max(infx_train_splsda_final$variates$X[,2])*1,
  x = min(infx_train_splsda_final$variates$X[,1]),
  paste0("ACC = ", 
                    round(infx_cm$overall[1]*100, 1), "% [",
                    round(infx_cm$overall[3]*100, 1), "%, ",
                    round(infx_cm$overall[4]*100, 1), "%]"),
    cex = 0.75, adj = 0)
text(
  y = max(infx_train_splsda_final$variates$X[,2])*0.9,
  x = min(infx_train_splsda_final$variates$X[,1]),
  paste0("Sens. = ", 
                    paste0(formatC(round(infx_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
                    paste0(formatC(round(infx_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
                    paste0(formatC(round(infx_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
    cex = 0.75, adj = 0)
text(
  y = max(infx_train_splsda_final$variates$X[,2])*0.8,
  x = min(infx_train_splsda_final$variates$X[,1]),
  paste0("Spec. = ", 
                    paste0(formatC(round(infx_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
                    paste0(formatC(round(infx_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
                    paste0(formatC(round(infx_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
    cex = 0.75, adj = 0)
text(
  y = max(infx_train_splsda_final$variates$X[,2])*0.7,
  x = min(infx_train_splsda_final$variates$X[,1]),
  paste0("OR = ",
                    formatC(round(infx_epi$detail[2][6,], 3), digits = 1, format = "f"),  " [",
                    formatC(round(infx_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
                    formatC(round(infx_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
    cex = 0.75, adj = 0)

Combine Figure 6A + 6B: sPLS-DA Diversity Groups and Any Bacterial Infection

{
    pdf(file = "./Results/Figure_6.pdf", height = 10, width = 18)
    par(mfrow = c(1, 2))

    # Figure 6A
    plotIndiv(diversity_train_splsda_final, xlim = c(min(diversity_train_splsda_final$variates$X[,
        1]) * 1.05, max(diversity_train_splsda_final$variates$X[,
        1]) * 1.8), ylim = c(min(diversity_train_splsda_final$variates$X[,
        2]) * 1.15, max(diversity_train_splsda_final$variates$X[,
        2]) * 1.8), comp = c(1, 2), pch = 1, ind.names = FALSE,
        legend = FALSE, background = diversity_train_background,
        col = c("#3A001E", "#8A0246", "#C20463"), star = TRUE,
        point.lwd = 0.5, title = NULL, size.title = 1e-05, style = "graphics",
        legend.title = "Diversity Group", X.label = paste0("Component 1 (",
            round(diversity_train_splsda_final$prop_expl_var$X[1] *
                100), "%)"), Y.label = paste0("Component 2 (",
            round(diversity_train_splsda_final$prop_expl_var$X[2] *
                100), "%)"))
    # addtable2plot( y = -4.68, x =
    # min(diversity_train_splsda_final$variates$X[,1])*1.1,
    # diversity_confusion_df, bty = 'o', display.rownames =
    # TRUE, hlines = TRUE, vlines = TRUE, cex = 1.35, bg =
    # 'white' )
    text(y = -6.5, x = min(diversity_train_splsda_final$variates$X[,
        1]) * 1.1, paste0("Overall ACC = ", paste0(formatC(round(diversity_cm$overall[1],
        3) * 100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(diversity_cm$overall[3],
        3) * 100, digits = 1, format = "f"), "%"), ", ", paste0(formatC(round(diversity_cm$overall[4],
        3) * 100, digits = 1, format = "f"), "%"), "]"), cex = 1.35,
        adj = 0)

    text(y = 3, x = 0.5, paste0("Low Diversity ACC = ", round(diversity_epi$balanced.accuracy[1] *
        100, 1), "%"), cex = 1.35, adj = 0, col = "#3A001E")
    text(y = 2.7, x = 0.5, paste0("Low Diversity Sens. = ", round(diversity_epi$precision[1] *
        100, 1), "%"), cex = 1.35, adj = 0, col = "#3A001E")
    text(y = 2.4, x = 0.5, paste0("Low Diversity Spec. = ", round(diversity_epi$specificity[1] *
        100, 1), "%"), cex = 1.35, adj = 0, col = "#3A001E")
    # text( y = 2.1, x = 0.5, paste0('Low Diversity OR = ',
    # round(diversity_epi$DOR[1], 1)), cex = 1.35, adj = 0,
    # col = '#3A001E')

    text(y = -5.5, x = 0, paste0("Medium Diversity ACC = ", round(diversity_epi$balanced.accuracy[2] *
        100, 1), "%"), cex = 1.35, adj = 0, col = "#8A0246")
    text(y = -5.8, x = 0, paste0("Medium Diversity Sens. = ",
        round(diversity_epi$precision[2] * 100, 1), "%"), cex = 1.35,
        adj = 0, col = "#8A0246")
    text(y = -6.1, x = 0, paste0("Medium Diversity Spec. = ",
        round(diversity_epi$specificity[2] * 100, 1), "%"), cex = 1.35,
        adj = 0, col = "#8A0246")
    # text( y = -7.4, x = 0, paste0('Medium Diversity OR =
    # ', round(diversity_epi$DOR[2], 1)), cex = 1.05, adj =
    # 0, col = '#8A0246')

    text(y = 3, x = min(diversity_train_splsda_final$variates$X[,
        1]) * 1.05, paste0("High Diversity ACC = ", round(diversity_epi$balanced.accuracy[3] *
        100, 1), "%"), cex = 1.35, adj = 0, col = "#C20463")
    text(y = 2.7, x = min(diversity_train_splsda_final$variates$X[,
        1]) * 1.05, paste0("High Diversity Sens. = ", round(diversity_epi$precision[3] *
        100, 1), "%"), cex = 1.35, adj = 0, col = "#C20463")
    text(y = 2.4, x = min(diversity_train_splsda_final$variates$X[,
        1]) * 1.05, paste0("High Diversity Spec. = ", round(diversity_epi$specificity[3] *
        100, 1), "%"), cex = 1.35, adj = 0, col = "#C20463")
    # text( y = 2.1, x =
    # min(diversity_train_splsda_final$variates$X[,1])*1.05,
    # paste0('High Diversity OR = ',
    # round(diversity_epi$DOR[3], 1)), cex = 1.35, adj = 0,
    # col = '#C20463')

    # Figure 6B
    plotIndiv(infx_train_splsda_final, comp = c(1, 2), pch = 1,
        ind.names = FALSE, legend = FALSE, background = infx_train_background,
        col = c("goldenrod", "gray75"), star = TRUE, point.lwd = 0.5,
        title = NULL, size.title = 1e-05, style = "graphics",
        legend.title = "Infection Group", X.label = paste0("Component 1 (",
            round(infx_train_splsda_final$prop_expl_var$X[1] *
                100), "%)"), Y.label = paste0("Component 2 (",
            round(infx_train_splsda_final$prop_expl_var$X[2] *
                100), "%)"))
    # addtable2plot( y =
    # max(infx_train_splsda_final$variates$X[,2])*0.1, x =
    # min(infx_train_splsda_final$variates$X[,1]),
    # infx_confusion_df[,1:2], bty = 'o', display.rownames
    # = TRUE, hlines = TRUE, vlines = TRUE, cex = 1.35, bg
    # = 'white' )
    text(y = -4, x = -2.75, substitute(bold("Infection")), cex = 1.75,
        adj = 0, col = "goldenrod")
    text(y = 4, x = -1, substitute(bold("No Infection")), cex = 1.75,
        adj = 0, col = "gray70")
    text(y = max(infx_train_splsda_final$variates$X[, 2]) * 1,
        x = min(infx_train_splsda_final$variates$X[, 1]), paste0("ACC = ",
            round(infx_cm$overall[1] * 100, 1), "% [", round(infx_cm$overall[3] *
                100, 1), "%, ", round(infx_cm$overall[4] * 100,
                1), "%]"), cex = 1.35, adj = 0)
    text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.9,
        x = min(infx_train_splsda_final$variates$X[, 1]), paste0("Sens. = ",
            paste0(formatC(round(infx_epi$detail[2][3, ], 3) *
                100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(infx_epi$detail[3][3,
                ], 3) * 100, digits = 1, format = "f"), "%"),
            ", ", paste0(formatC(round(infx_epi$detail[4][3,
                ], 3) * 100, digits = 1, format = "f"), "%"),
            "]"), cex = 1.35, adj = 0)
    text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.8,
        x = min(infx_train_splsda_final$variates$X[, 1]), paste0("Spec. = ",
            paste0(formatC(round(infx_epi$detail[2][4, ], 3) *
                100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(infx_epi$detail[3][4,
                ], 3) * 100, digits = 1, format = "f"), "%"),
            ", ", paste0(formatC(round(infx_epi$detail[4][4,
                ], 3) * 100, digits = 1, format = "f"), "%"),
            "]"), cex = 1.35, adj = 0)
    text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.7,
        x = min(infx_train_splsda_final$variates$X[, 1]), paste0("OR = ",
            formatC(round(infx_epi$detail[2][6, ], 3), digits = 1,
                format = "f"), " [", formatC(round(infx_epi$detail[3][6,
                ], 3), digits = 1, format = "f"), ", ", formatC(round(infx_epi$detail[4][6,
                ], 3), digits = 1, format = "f"), "]"), cex = 1.35,
        adj = 0)

    invisible(dev.off())
}


# Figure 6A
par(mfrow = c(1, 2))

plotIndiv(diversity_train_splsda_final, xlim = c(min(diversity_train_splsda_final$variates$X[,
    1]) * 1.05, max(diversity_train_splsda_final$variates$X[,
    1]) * 1.8), ylim = c(min(diversity_train_splsda_final$variates$X[,
    2]) * 1.15, max(diversity_train_splsda_final$variates$X[,
    2]) * 1.8), comp = c(1, 2), pch = 1, ind.names = FALSE, legend = FALSE,
    background = diversity_train_background, col = c("#3A001E",
        "#8A0246", "#C20463"), star = TRUE, point.lwd = 0.5,
    title = NULL, size.title = 1e-05, style = "graphics", legend.title = "Diversity Group",
    X.label = paste0("Component 1 (", round(diversity_train_splsda_final$prop_expl_var$X[1] *
        100), "%)"), Y.label = paste0("Component 2 (", round(diversity_train_splsda_final$prop_expl_var$X[2] *
        100), "%)"))
addtable2plot(y = -4.68, x = min(diversity_train_splsda_final$variates$X[,
    1]) * 1.1, diversity_confusion_df, bty = "o", display.rownames = TRUE,
    hlines = TRUE, vlines = TRUE, cex = 0.75, bg = "white")
text(y = -6.5, x = min(diversity_train_splsda_final$variates$X[,
    1]) * 1.1, paste0("Overall ACC = ", paste0(formatC(round(diversity_cm$overall[1],
    3) * 100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(diversity_cm$overall[3],
    3) * 100, digits = 1, format = "f"), "%"), ", ", paste0(formatC(round(diversity_cm$overall[4],
    3) * 100, digits = 1, format = "f"), "%"), "]"), cex = 1.05,
    adj = 0)

text(y = 3, x = 0.5, paste0("Low Diversity ACC = ", round(diversity_epi$balanced.accuracy[1] *
    100, 1), "%"), cex = 1.05, adj = 0, col = "#3A001E")
text(y = 2.7, x = 0.5, paste0("Low Diversity Sens. = ", round(diversity_epi$precision[1] *
    100, 1), "%"), cex = 1.05, adj = 0, col = "#3A001E")
text(y = 2.4, x = 0.5, paste0("Low Diversity Spec. = ", round(diversity_epi$specificity[1] *
    100, 1), "%"), cex = 1.05, adj = 0, col = "#3A001E")
# text( y = 2.1, x = 0.5, paste0('Low Diversity OR = ',
# round(diversity_epi$DOR[1], 1)), cex = 1.05, adj = 0, col
# = '#3A001E')

text(y = -5.5, x = 0, paste0("Medium Diversity ACC = ", round(diversity_epi$balanced.accuracy[2] *
    100, 1), "%"), cex = 1.05, adj = 0, col = "#8A0246")
text(y = -5.8, x = 0, paste0("Medium Diversity Sens. = ", round(diversity_epi$precision[2] *
    100, 1), "%"), cex = 1.05, adj = 0, col = "#8A0246")
text(y = -6.1, x = 0, paste0("Medium Diversity Spec. = ", round(diversity_epi$specificity[2] *
    100, 1), "%"), cex = 1.05, adj = 0, col = "#8A0246")
# text( y = -7.4, x = 0, paste0('Medium Diversity OR = ',
# round(diversity_epi$DOR[2], 1)), cex = 1.05, adj = 0, col
# = '#8A0246')

text(y = 3, x = min(diversity_train_splsda_final$variates$X[,
    1]) * 1.05, paste0("High Diversity ACC = ", round(diversity_epi$balanced.accuracy[3] *
    100, 1), "%"), cex = 1.05, adj = 0, col = "#C20463")
text(y = 2.7, x = min(diversity_train_splsda_final$variates$X[,
    1]) * 1.05, paste0("High Diversity Sens. = ", round(diversity_epi$precision[3] *
    100, 1), "%"), cex = 1.05, adj = 0, col = "#C20463")
text(y = 2.4, x = min(diversity_train_splsda_final$variates$X[,
    1]) * 1.05, paste0("High Diversity Spec. = ", round(diversity_epi$specificity[3] *
    100, 1), "%"), cex = 1.05, adj = 0, col = "#C20463")
# text( y = 2.1, x =
# min(diversity_train_splsda_final$variates$X[,1])*1.05,
# paste0('High Diversity OR = ',
# round(diversity_epi$DOR[3], 1)), cex = 1.05, adj = 0, col
# = '#C20463')

# Figure 6B
plotIndiv(infx_train_splsda_final, comp = c(1, 2), pch = 1, ind.names = FALSE,
    legend = FALSE, background = infx_train_background, col = c("goldenrod",
        "gray75"), star = TRUE, point.lwd = 0.5, title = NULL,
    size.title = 1e-05, style = "graphics", legend.title = "Infection Group",
    X.label = paste0("Component 1 (", round(infx_train_splsda_final$prop_expl_var$X[1] *
        100), "%)"), Y.label = paste0("Component 2 (", round(infx_train_splsda_final$prop_expl_var$X[2] *
        100), "%)"))
addtable2plot(y = max(infx_train_splsda_final$variates$X[, 2]) *
    0.1, x = min(infx_train_splsda_final$variates$X[, 1]), infx_confusion_df[,
    1:2], bty = "o", display.rownames = TRUE, hlines = TRUE,
    vlines = TRUE, cex = 0.75, bg = "white")
text(y = -4, x = -2.75, substitute(bold("Infection")), cex = 1.75,
    adj = 0, col = "goldenrod")
text(y = 4, x = -1, substitute(bold("No Infection")), cex = 1.75,
    adj = 0, col = "gray70")
text(y = max(infx_train_splsda_final$variates$X[, 2]) * 1, x = min(infx_train_splsda_final$variates$X[,
    1]), paste0("ACC = ", round(infx_cm$overall[1] * 100, 1),
    "% [", round(infx_cm$overall[3] * 100, 1), "%, ", round(infx_cm$overall[4] *
        100, 1), "%]"), cex = 1.05, adj = 0)
text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.9,
    x = min(infx_train_splsda_final$variates$X[, 1]), paste0("Sens. = ",
        paste0(formatC(round(infx_epi$detail[2][3, ], 3) * 100,
            digits = 1, format = "f"), "%"), " [", paste0(formatC(round(infx_epi$detail[3][3,
            ], 3) * 100, digits = 1, format = "f"), "%"), ", ",
        paste0(formatC(round(infx_epi$detail[4][3, ], 3) * 100,
            digits = 1, format = "f"), "%"), "]"), cex = 1.05,
    adj = 0)
text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.8,
    x = min(infx_train_splsda_final$variates$X[, 1]), paste0("Spec. = ",
        paste0(formatC(round(infx_epi$detail[2][4, ], 3) * 100,
            digits = 1, format = "f"), "%"), " [", paste0(formatC(round(infx_epi$detail[3][4,
            ], 3) * 100, digits = 1, format = "f"), "%"), ", ",
        paste0(formatC(round(infx_epi$detail[4][4, ], 3) * 100,
            digits = 1, format = "f"), "%"), "]"), cex = 1.05,
    adj = 0)
text(y = max(infx_train_splsda_final$variates$X[, 2]) * 0.7,
    x = min(infx_train_splsda_final$variates$X[, 1]), paste0("OR = ",
        formatC(round(infx_epi$detail[2][6, ], 3), digits = 1,
            format = "f"), " [", formatC(round(infx_epi$detail[3][6,
            ], 3), digits = 1, format = "f"), ", ", formatC(round(infx_epi$detail[4][6,
            ], 3), digits = 1, format = "f"), "]"), cex = 1.05,
    adj = 0)

Supplemental Figure 2A + 2B: Plot Loadings for Diversity Groups

# Take absolute value to make diversity group loadings plot
# narrower
diversity_train_splsda_final$loadings$X <- abs(diversity_train_splsda_final$loadings$X)

# Combine Supplemental Figures 2A + 2B

{
    pdf(file = "./Results/Supplemental_Figure_2.pdf", height = 8,
        width = 11)

    par(mfrow = c(1, 3))

    # Supplmental Figure 2A
    plotLoadings(diversity_train_splsda_final, contrib = "max",
        method = "mean", comp = 1, legend = FALSE, legend.col = c("#EDE342",
            "#F69A97", "#FF51EB"), size.name = 1.1, size.title = rel(1),
        ndisplay = 50)

    # Supplmental Figure 2B
    plotLoadings(diversity_train_splsda_final, contrib = "max",
        method = "mean", comp = 2, legend.col = c("#EDE342",
            "#F69A97", "#FF51EB"), size.name = 1.1, size.title = rel(1),
        ndisplay = 50)

    invisible(dev.off())
}

par(mfrow = c(1, 3))

# Supplmental Figure 2A
plotLoadings(diversity_train_splsda_final, contrib = "max", method = "mean",
    comp = 1, legend = FALSE, legend.col = c("#EDE342", "#F69A97",
        "#FF51EB"), size.name = 0.6, size.title = rel(1), ndisplay = 50)

# Supplmental Figure 2B
plotLoadings(diversity_train_splsda_final, contrib = "max", method = "mean",
    comp = 2, legend.col = c("#EDE342", "#F69A97", "#FF51EB"),
    size.name = 0.6, size.title = rel(1), ndisplay = 50)

Supplemental Figure 3A + 3B: Plot Loadings for Any Bacterial Infection

# Take absolute value to make diversity group loadings plot
# narrower
infx_train_splsda_final$loadings$X <- abs(infx_train_splsda_final$loadings$X)

# Combine Supplemental Figures 2A + 2B

{
    pdf(file = "./Results/Supplemental_Figure_3.pdf", height = 8,
        width = 11)

    par(mfrow = c(1, 3))

    # Supplmental Figure 3A
    plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
        comp = 1, legend = FALSE, legend.col = c("goldenrod",
            "gray75"), size.name = 1.1, size.title = rel(1),
        ndisplay = 50)

    # Supplmental Figure 3B
    plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
        comp = 2, legend.col = c("goldenrod", "gray75"), size.name = 1.1,
        size.title = rel(1), ndisplay = 50)

    invisible(dev.off())
}

par(mfrow = c(1, 3))

# Supplmental Figure 3A
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
    comp = 1, legend = FALSE, legend.col = c("goldenrod", "gray75"),
    size.name = 0.6, size.title = rel(1), ndisplay = 50)

# Supplmental Figure 3B
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
    comp = 2, legend.col = c("goldenrod", "gray75"), size.name = 0.6,
    size.title = rel(1), ndisplay = 50)

Clinical Tables

Supplemental Figure 1: Flow Diagram

flow_chart <- flow_exclusions(incl_counts = c(158, 130, 107,
    25), total_label = "Total Patients Enrolled", incl_labels = c("Patients w/ Transplant",
    "Patients Included", "Patients w/ Bacterial Infection"),
    excl_labels = c("No Transplant", "No Sample In Study Period\nDay -7 to +30",
        "Patients w/o Bacterial Infection"), show_count = TRUE)

flow_chart
flow_chart %>%
    export_svg() %>%
    charToRaw() %>%
    rsvg_pdf("./Results/Supplemental_Figure_1.pdf")

Demographics

## Vector of variables to summarize
demo_vars <- c("race", "sex", "age", "meld_transplant", "Alcoholic Hepatitis",
    "Alcoholic Cirrhosis", "NAFLD/NASH", "Primary Sclerosing Cholangitis",
    "Acute Viral Hepatitis", "Chronic Hepatitis B", "Chronic Hepatitis C",
    "Autoimmune", "Wilson's Disease", "Alpha-1 Antitrypsin",
    "Hemachromatosis", "Drug Induced Liver Injury or Toxin",
    "Budd Chiari", "Cryptogenic", "Malignancy", "Other", "Dialysis",
    "Pressers", "Mechanical Ventilation")
## Vector of categorical variables that need transformation
demo_cats <- c("race", "sex", "Alcoholic Hepatitis", "Alcoholic Cirrhosis",
    "NAFLD/NASH", "Primary Sclerosing Cholangitis", "Acute Viral Hepatitis",
    "Chronic Hepatitis B", "Chronic Hepatitis C", "Autoimmune",
    "Wilson's Disease", "Alpha-1 Antitrypsin", "Hemachromatosis",
    "Drug Induced Liver Injury or Toxin", "Budd Chiari", "Cryptogenic",
    "Malignancy", "Other", "Dialysis", "Pressers", "Mechanical Ventilation")


tab1_1 <- CreateTableOne(vars = demo_vars, testNonNormal = "wilcox.test",
    includeNA = TRUE, factorVars = demo_cats, strata = "any_infection",
    data = demo)

summary(tab1_1)  # Age is potentially skewed, need to state that it is skewed and re-run `CreateTableOne`
## 
##      ### Summary of continuous variables ###
## 
## any_infection: 0
##                  n miss p.miss mean sd median p25 p75 min max  skew kurt
## age             82    0      0   51 17     55  40  63   2  77 -1.00  0.8
## meld_transplant 82    0      0   26 10     28  19  33   6  49 -0.04 -0.7
## ------------------------------------------------------------ 
## any_infection: 1
##                  n miss p.miss mean sd median p25 p75 min max skew   kurt
## age             25    0      0   56 15     59  46  68  22  73 -0.9  0.002
## meld_transplant 25    0      0   27 10     30  19  33  11  42 -0.3 -1.057
## 
## p-values
##                   pNormal pNonNormal
## age             0.2072738  0.1765252
## meld_transplant 0.6624494  0.6085202
## 
## Standardize mean differences
##                    1 vs 2
## age             0.2976093
## meld_transplant 0.1025141
## 
## =======================================================================================
## 
##      ### Summary of categorical variables ### 
## 
## any_infection: 0
##                                 var  n miss p.miss
##                                race 82    0    0.0
##                                                   
##                                                   
##                                                   
##                                                   
##                                                   
##                                                   
##                                                   
##                                 sex 82    0    0.0
##                                                   
##                                                   
##                 Alcoholic Hepatitis 82    0    0.0
##                                                   
##                                                   
##                 Alcoholic Cirrhosis 82    0    0.0
##                                                   
##                                                   
##                          NAFLD/NASH 82    0    0.0
##                                                   
##                                                   
##      Primary Sclerosing Cholangitis 82    0    0.0
##                                                   
##                                                   
##               Acute Viral Hepatitis 82    0    0.0
##                                                   
##                                                   
##                 Chronic Hepatitis B 82    0    0.0
##                                                   
##                                                   
##                 Chronic Hepatitis C 82    0    0.0
##                                                   
##                                                   
##                          Autoimmune 82    0    0.0
##                                                   
##                                                   
##                    Wilson's Disease 82    0    0.0
##                                                   
##                                                   
##                 Alpha-1 Antitrypsin 82    0    0.0
##                                                   
##                     Hemachromatosis 82    0    0.0
##                                                   
##                                                   
##  Drug Induced Liver Injury or Toxin 82    0    0.0
##                                                   
##                                                   
##                         Budd Chiari 82    0    0.0
##                                                   
##                         Cryptogenic 82    0    0.0
##                                                   
##                                                   
##                          Malignancy 82    0    0.0
##                                                   
##                                                   
##                               Other 82    0    0.0
##                                                   
##                                                   
##                            Dialysis 82    0    0.0
##                                                   
##                                                   
##                            Pressers 82    0    0.0
##                                                   
##                                                   
##              Mechanical Ventilation 82    0    0.0
##                                                   
##                                                   
##                             level freq percent cum.percent
##  American Indian or Alaska Native    1     1.2         1.2
##              Asian/Mideast Indian    6     7.3         8.5
##            Black/African-American    9    11.0        19.5
##                More than one Race    8     9.8        29.3
##                  Patient Declined    4     4.9        34.1
##                           Unknown    0     0.0        34.1
##                             White   54    65.9       100.0
##                                                           
##                            Female   38    46.3        46.3
##                              Male   44    53.7       100.0
##                                                           
##                                 0   76    92.7        92.7
##                                 1    6     7.3       100.0
##                                                           
##                                 0   42    51.2        51.2
##                                 1   40    48.8       100.0
##                                                           
##                                 0   72    87.8        87.8
##                                 1   10    12.2       100.0
##                                                           
##                                 0   76    92.7        92.7
##                                 1    6     7.3       100.0
##                                                           
##                                 0   78    95.1        95.1
##                                 1    4     4.9       100.0
##                                                           
##                                 0   82   100.0       100.0
##                                 1    0     0.0       100.0
##                                                           
##                                 0   78    95.1        95.1
##                                 1    4     4.9       100.0
##                                                           
##                                 0   77    93.9        93.9
##                                 1    5     6.1       100.0
##                                                           
##                                 0   80    97.6        97.6
##                                 1    2     2.4       100.0
##                                                           
##                                 0   82   100.0       100.0
##                                                           
##                                 0   82   100.0       100.0
##                                 1    0     0.0       100.0
##                                                           
##                                 0   81    98.8        98.8
##                                 1    1     1.2       100.0
##                                                           
##                                 0   82   100.0       100.0
##                                                           
##                                 0   78    95.1        95.1
##                                 1    4     4.9       100.0
##                                                           
##                                 0   65    79.3        79.3
##                                 1   17    20.7       100.0
##                                                           
##                                 0   74    90.2        90.2
##                                 1    8     9.8       100.0
##                                                           
##                                 0   60    73.2        73.2
##                                 1   22    26.8       100.0
##                                                           
##                                 0   76    92.7        92.7
##                                 1    6     7.3       100.0
##                                                           
##                                 0   77    93.9        93.9
##                                 1    5     6.1       100.0
##                                                           
## ------------------------------------------------------------ 
## any_infection: 1
##                                 var  n miss p.miss
##                                race 25    0    0.0
##                                                   
##                                                   
##                                                   
##                                                   
##                                                   
##                                                   
##                                                   
##                                 sex 25    0    0.0
##                                                   
##                                                   
##                 Alcoholic Hepatitis 25    0    0.0
##                                                   
##                                                   
##                 Alcoholic Cirrhosis 25    0    0.0
##                                                   
##                                                   
##                          NAFLD/NASH 25    0    0.0
##                                                   
##                                                   
##      Primary Sclerosing Cholangitis 25    0    0.0
##                                                   
##                                                   
##               Acute Viral Hepatitis 25    0    0.0
##                                                   
##                                                   
##                 Chronic Hepatitis B 25    0    0.0
##                                                   
##                                                   
##                 Chronic Hepatitis C 25    0    0.0
##                                                   
##                                                   
##                          Autoimmune 25    0    0.0
##                                                   
##                                                   
##                    Wilson's Disease 25    0    0.0
##                                                   
##                                                   
##                 Alpha-1 Antitrypsin 25    0    0.0
##                                                   
##                     Hemachromatosis 25    0    0.0
##                                                   
##                                                   
##  Drug Induced Liver Injury or Toxin 25    0    0.0
##                                                   
##                                                   
##                         Budd Chiari 25    0    0.0
##                                                   
##                         Cryptogenic 25    0    0.0
##                                                   
##                                                   
##                          Malignancy 25    0    0.0
##                                                   
##                                                   
##                               Other 25    0    0.0
##                                                   
##                                                   
##                            Dialysis 25    0    0.0
##                                                   
##                                                   
##                            Pressers 25    0    0.0
##                                                   
##                                                   
##              Mechanical Ventilation 25    0    0.0
##                                                   
##                                                   
##                             level freq percent cum.percent
##  American Indian or Alaska Native    0     0.0         0.0
##              Asian/Mideast Indian    2     8.0         8.0
##            Black/African-American    2     8.0        16.0
##                More than one Race    2     8.0        24.0
##                  Patient Declined    1     4.0        28.0
##                           Unknown    2     8.0        36.0
##                             White   16    64.0       100.0
##                                                           
##                            Female    9    36.0        36.0
##                              Male   16    64.0       100.0
##                                                           
##                                 0   23    92.0        92.0
##                                 1    2     8.0       100.0
##                                                           
##                                 0   17    68.0        68.0
##                                 1    8    32.0       100.0
##                                                           
##                                 0   19    76.0        76.0
##                                 1    6    24.0       100.0
##                                                           
##                                 0   25   100.0       100.0
##                                 1    0     0.0       100.0
##                                                           
##                                 0   25   100.0       100.0
##                                 1    0     0.0       100.0
##                                                           
##                                 0   24    96.0        96.0
##                                 1    1     4.0       100.0
##                                                           
##                                 0   25   100.0       100.0
##                                 1    0     0.0       100.0
##                                                           
##                                 0   25   100.0       100.0
##                                 1    0     0.0       100.0
##                                                           
##                                 0   24    96.0        96.0
##                                 1    1     4.0       100.0
##                                                           
##                                 0   25   100.0       100.0
##                                                           
##                                 0   24    96.0        96.0
##                                 1    1     4.0       100.0
##                                                           
##                                 0   25   100.0       100.0
##                                 1    0     0.0       100.0
##                                                           
##                                 0   25   100.0       100.0
##                                                           
##                                 0   24    96.0        96.0
##                                 1    1     4.0       100.0
##                                                           
##                                 0   19    76.0        76.0
##                                 1    6    24.0       100.0
##                                                           
##                                 0   20    80.0        80.0
##                                 1    5    20.0       100.0
##                                                           
##                                 0   16    64.0        64.0
##                                 1    9    36.0       100.0
##                                                           
##                                 0   20    80.0        80.0
##                                 1    5    20.0       100.0
##                                                           
##                                 0   23    92.0        92.0
##                                 1    2     8.0       100.0
##                                                           
## 
## p-values
##                                      pApprox    pExact
## race                               0.3074905 0.4275466
## sex                                0.4953032 0.4904875
## Alcoholic Hepatitis                1.0000000 1.0000000
## Alcoholic Cirrhosis                0.2123469 0.1713637
## NAFLD/NASH                         0.2590605 0.1979409
## Primary Sclerosing Cholangitis     0.3704754 0.3322151
## Acute Viral Hepatitis              0.6007080 0.5709809
## Chronic Hepatitis B                0.5271112 0.2336449
## Chronic Hepatitis C                0.6007080 0.5709809
## Autoimmune                         0.4694777 0.5886832
## Wilson's Disease                   1.0000000 0.5538202
## Alpha-1 Antitrypsin                       NA        NA
## Hemachromatosis                    0.5271112 0.2336449
## Drug Induced Liver Injury or Toxin 1.0000000 1.0000000
## Budd Chiari                               NA        NA
## Cryptogenic                        1.0000000 1.0000000
## Malignancy                         0.9440592 0.7828238
## Other                              0.3063991 0.1774710
## Dialysis                           0.5266900 0.4513768
## Pressers                           0.1465607 0.1238754
## Mechanical Ventilation             1.0000000 0.6642869
## 
## Standardize mean differences
##                                        1 vs 2
## race                               0.45891730
## sex                                0.21130091
## Alcoholic Hepatitis                0.02568258
## Alcoholic Cirrhosis                0.34709743
## NAFLD/NASH                         0.31029007
## Primary Sclerosing Cholangitis     0.39735971
## Acute Viral Hepatitis              0.32025631
## Chronic Hepatitis B                0.28867513
## Chronic Hepatitis C                0.32025631
## Autoimmune                         0.36037499
## Wilson's Disease                   0.08851811
## Alpha-1 Antitrypsin                0.00000000
## Hemachromatosis                    0.28867513
## Drug Induced Liver Injury or Toxin 0.15713484
## Budd Chiari                        0.00000000
## Cryptogenic                        0.04264158
## Malignancy                         0.07849392
## Other                              0.29088216
## Dialysis                           0.19854168
## Pressers                           0.37578690
## Mechanical Ventilation             0.07437488
tableone_skewed <- c("age", "meld_transplant")

tab1_2 <- print(tab1_1, nonnormal = tableone_skewed, formatOptions = list(big.mark = ","))
##                                             Stratified by any_infection
##                                              0                   
##   n                                             82               
##   race (%)                                                       
##      American Indian or Alaska Native            1 (  1.2)       
##      Asian/Mideast Indian                        6 (  7.3)       
##      Black/African-American                      9 ( 11.0)       
##      More than one Race                          8 (  9.8)       
##      Patient Declined                            4 (  4.9)       
##      Unknown                                     0 (  0.0)       
##      White                                      54 ( 65.9)       
##   sex = Male (%)                                44 ( 53.7)       
##   age (median [IQR])                         55.00 [39.50, 63.00]
##   meld_transplant (median [IQR])             28.00 [19.00, 33.00]
##   Alcoholic Hepatitis = 1 (%)                    6 (  7.3)       
##   Alcoholic Cirrhosis = 1 (%)                   40 ( 48.8)       
##   NAFLD/NASH = 1 (%)                            10 ( 12.2)       
##   Primary Sclerosing Cholangitis = 1 (%)         6 (  7.3)       
##   Acute Viral Hepatitis = 1 (%)                  4 (  4.9)       
##   Chronic Hepatitis B = 1 (%)                    0 (  0.0)       
##   Chronic Hepatitis C = 1 (%)                    4 (  4.9)       
##   Autoimmune = 1 (%)                             5 (  6.1)       
##   Wilson's Disease = 1 (%)                       2 (  2.4)       
##   Alpha-1 Antitrypsin = 0 (%)                   82 (100.0)       
##   Hemachromatosis = 1 (%)                        0 (  0.0)       
##   Drug Induced Liver Injury or Toxin = 1 (%)     1 (  1.2)       
##   Budd Chiari = 0 (%)                           82 (100.0)       
##   Cryptogenic = 1 (%)                            4 (  4.9)       
##   Malignancy = 1 (%)                            17 ( 20.7)       
##   Other = 1 (%)                                  8 (  9.8)       
##   Dialysis = 1 (%)                              22 ( 26.8)       
##   Pressers = 1 (%)                               6 (  7.3)       
##   Mechanical Ventilation = 1 (%)                 5 (  6.1)       
##                                             Stratified by any_infection
##                                              1                    p     
##   n                                             25                      
##   race (%)                                                         0.307
##      American Indian or Alaska Native            0 (  0.0)              
##      Asian/Mideast Indian                        2 (  8.0)              
##      Black/African-American                      2 (  8.0)              
##      More than one Race                          2 (  8.0)              
##      Patient Declined                            1 (  4.0)              
##      Unknown                                     2 (  8.0)              
##      White                                      16 ( 64.0)              
##   sex = Male (%)                                16 ( 64.0)         0.495
##   age (median [IQR])                         59.00 [46.00, 68.00]  0.177
##   meld_transplant (median [IQR])             30.00 [19.00, 33.00]  0.609
##   Alcoholic Hepatitis = 1 (%)                    2 (  8.0)         1.000
##   Alcoholic Cirrhosis = 1 (%)                    8 ( 32.0)         0.212
##   NAFLD/NASH = 1 (%)                             6 ( 24.0)         0.259
##   Primary Sclerosing Cholangitis = 1 (%)         0 (  0.0)         0.370
##   Acute Viral Hepatitis = 1 (%)                  0 (  0.0)         0.601
##   Chronic Hepatitis B = 1 (%)                    1 (  4.0)         0.527
##   Chronic Hepatitis C = 1 (%)                    0 (  0.0)         0.601
##   Autoimmune = 1 (%)                             0 (  0.0)         0.469
##   Wilson's Disease = 1 (%)                       1 (  4.0)         1.000
##   Alpha-1 Antitrypsin = 0 (%)                   25 (100.0)            NA
##   Hemachromatosis = 1 (%)                        1 (  4.0)         0.527
##   Drug Induced Liver Injury or Toxin = 1 (%)     0 (  0.0)         1.000
##   Budd Chiari = 0 (%)                           25 (100.0)            NA
##   Cryptogenic = 1 (%)                            1 (  4.0)         1.000
##   Malignancy = 1 (%)                             6 ( 24.0)         0.944
##   Other = 1 (%)                                  5 ( 20.0)         0.306
##   Dialysis = 1 (%)                               9 ( 36.0)         0.527
##   Pressers = 1 (%)                               5 ( 20.0)         0.147
##   Mechanical Ventilation = 1 (%)                 2 (  8.0)         1.000
##                                             Stratified by any_infection
##                                              test   
##   n                                                 
##   race (%)                                          
##      American Indian or Alaska Native               
##      Asian/Mideast Indian                           
##      Black/African-American                         
##      More than one Race                             
##      Patient Declined                               
##      Unknown                                        
##      White                                          
##   sex = Male (%)                                    
##   age (median [IQR])                         nonnorm
##   meld_transplant (median [IQR])             nonnorm
##   Alcoholic Hepatitis = 1 (%)                       
##   Alcoholic Cirrhosis = 1 (%)                       
##   NAFLD/NASH = 1 (%)                                
##   Primary Sclerosing Cholangitis = 1 (%)            
##   Acute Viral Hepatitis = 1 (%)                     
##   Chronic Hepatitis B = 1 (%)                       
##   Chronic Hepatitis C = 1 (%)                       
##   Autoimmune = 1 (%)                                
##   Wilson's Disease = 1 (%)                          
##   Alpha-1 Antitrypsin = 0 (%)                       
##   Hemachromatosis = 1 (%)                           
##   Drug Induced Liver Injury or Toxin = 1 (%)        
##   Budd Chiari = 0 (%)                               
##   Cryptogenic = 1 (%)                               
##   Malignancy = 1 (%)                                
##   Other = 1 (%)                                     
##   Dialysis = 1 (%)                                  
##   Pressers = 1 (%)                                  
##   Mechanical Ventilation = 1 (%)
write.csv(tab1_2, "./Results/Demo_Table_1.csv", row.names = TRUE)  # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe

# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
tab1_2_padjust1 <- read.csv("./Results/Demo_Table_1.csv") %>%
    dplyr::rename(` ` = X, `No Infection` = X0, `Bacterial Infection` = X1)

tab1_2_padjust2 <- tab1_2_padjust1 %>%
    mutate(` ` = factor(` `, levels = tab1_2_padjust1$` `))

tab1_2_padjust3 <- tab1_2_padjust1 %>%
    mutate(test = ifelse(!is.na(p) & test == "", "chi.sq", test)) %>%
    group_by(test) %>%
    rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
    ungroup() %>%
    mutate(` ` = factor(` `, tab1_2_padjust2$` `)) %>%
    arrange(` `) %>%
    mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
        "", p.adj))

# Read in csv to then append adjusted pvalues
write.csv(tab1_2_padjust3, "./Results/Demo_Table_1_padjust.csv",
    row.names = FALSE)

Culture: Supplemental Table 1

# Percentage of infections # where there is an infection
# within 30 days after transplant and not 0 days before
cult_percent_infx_all <- peri_criteria_all %>%
    filter(bact_infection_present == "Yes", between(eday, 0,
        30)) %>%
    group_by(patientID, sampleID, eday) %>%
    arrange(-infx_stool) %>%
    dplyr::slice(1) %>%
    ungroup() %>%
    select(patientID, sampleID, bact_infection_present, infx_stool,
        organism1, micro1.factor) %>%
    distinct() %>%
    mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
        replacement = ""), organism1 = str_to_lower(string = organism1),
        organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
            organism1, "Culture Negative")) %>%
    group_by(patientID, sampleID, infx_stool) %>%
    mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
        ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
        pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
        pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
        pattern = "enterococcusgallinarum", ignore.case = T),
        `Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
            ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
            pattern = "enterobactercloaceae", ignore.case = T),
        `Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
            ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
            pattern = "citrobacterfreundii", ignore.case = T),
        `Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
            ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
            pattern = "staphylococcusaureus", ignore.case = T),
        `Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
            ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
            pattern = "pseudomonasaeruginosa", ignore.case = T),
        `Stenotrophmonas maltophilia` = grepl(x = organism1,
            pattern = "stenotrophmonasmaltophilia", ignore.case = T),
        `Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
            ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
            pattern = "clostridiumdifficile|clostridioidesdifficile",
            ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
            pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
            pattern = "Culture Negative", ignore.case = T)) %>%
    pivot_longer(-c(patientID:micro1.factor), names_to = "organisms",
        values_to = "org_presence") %>%
    mutate(organisms = ifelse(bact_infection_present == "No",
        "No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
        TRUE, 1, 0)) %>%
    group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
    dplyr::slice_max(org_presence) %>%
    ungroup() %>%
    filter(organisms != "No Bacterial Infection") %>%
    group_by(patientID, bact_infection_present, organisms) %>%
    dplyr::slice_max(org_presence) %>%
    ungroup() %>%
    filter(org_presence == 1) %>%
    group_by(patientID, infx_stool, organisms) %>%
    dplyr::slice(1) %>%
    group_by(organisms) %>%
    tally() %>%
    ungroup() %>%
    mutate(total_infections = sum(n)) %>%
    replace_na(list(group = "unknown")) %>%
    group_by(organisms) %>%
    dplyr::summarize(percent = sum(n)/total_infections * 100,
        count = sum(n))

# Location of infections # where there is an infection
# within 30 days after transplant and not 0 days before
cult_location_infx_all <- peri_criteria_all %>%
    filter(bact_infection_present == "Yes", between(eday, 0,
        30)) %>%
    group_by(patientID, eday, key) %>%
    arrange(-infx_stool) %>%
    dplyr::slice(1) %>%
    ungroup() %>%
    select(patientID, bact_infection_present, infx_stool, organism1,
        key) %>%
    distinct() %>%
    mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
        replacement = ""), organism1 = str_to_lower(string = organism1),
        organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
            organism1, "Culture Negative")) %>%
    group_by(patientID, infx_stool) %>%
    mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
        ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
        pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
        pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
        pattern = "enterococcusgallinarum", ignore.case = T),
        `Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
            ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
            pattern = "enterobactercloaceae", ignore.case = T),
        `Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
            ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
            pattern = "citrobacterfreundii", ignore.case = T),
        `Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
            ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
            pattern = "staphylococcusaureus", ignore.case = T),
        `Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
            ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
            pattern = "pseudomonasaeruginosa", ignore.case = T),
        `Stenotrophmonas maltophilia` = grepl(x = organism1,
            pattern = "stenotrophmonasmaltophilia", ignore.case = T),
        `Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
            ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
            pattern = "clostridiumdifficile|clostridioidesdifficile",
            ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
            pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
            pattern = "Culture Negative", ignore.case = T)) %>%
    pivot_longer(-c(patientID:key), names_to = "organisms", values_to = "org_presence") %>%
    mutate(organisms = ifelse(bact_infection_present == "No",
        "No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
        TRUE, 1, 0)) %>%
    group_by(patientID, key, infx_stool, bact_infection_present) %>%
    dplyr::slice_max(org_presence) %>%
    ungroup() %>%
    filter(org_presence == 1) %>%
    filter(organisms != "No Bacterial Infection") %>%
    group_by(patientID, infx_stool, key) %>%
    dplyr::slice(1) %>%
    group_by(key) %>%
    tally() %>%
    ungroup() %>%
    mutate(total_infections = sum(n)) %>%
    replace_na(list(group = "unknown")) %>%
    group_by(key) %>%
    dplyr::summarize(percent = sum(n)/total_infections * 100,
        count = sum(n))

# Type of Infection # where there is an infection within 30
# days after transplant and not 0 days before
cult_type_infx_all <- peri_criteria_all %>%
    filter(bact_infection_present == "Yes", grepl(variable, pattern = "anatomy.+"),
        between(eday, 0, 30)) %>%
    group_by(patientID, eday, diag_cat2) %>%
    arrange(-infx_stool) %>%
    dplyr::slice(1) %>%
    ungroup() %>%
    mutate(diag_cat3 = case_when(grepl("abdominal", diag_cat2,
        ignore.case = T) ~ "Abdominal", grepl("vir|bronchitis|covid-19|cmv",
        diag_cat2, ignore.case = T) ~ "Viral", grepl("bacteremia",
        diag_cat2, ignore.case = T) ~ "Bacteremia", grepl("thrush",
        diag_cat2, ignore.case = T) ~ "Fungal", grepl("cholangitis|empyema|panniculitis|peritonitis|latent tb",
        diag_cat2, ignore.case = T) ~ "Bacterial", grepl("cystitis|pyelonephritis",
        diag_cat2, ignore.case = T) ~ "Urinary Tract Infection",
        grepl("pneumonia", diag_cat2, ignore.case = T) ~ "Pneumonia",
        TRUE ~ diag_cat2), diag_cat3 = str_to_title(diag_cat3)) %>%
    ungroup() %>%
    select(patientID, bact_infection_present, infx_stool, organism1,
        micro1.factor, key, diag_cat3) %>%
    distinct() %>%
    mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
        replacement = ""), organism1 = str_to_lower(string = organism1),
        organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
            organism1, "Culture Negative")) %>%
    group_by(patientID, infx_stool) %>%
    mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
        ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
        pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
        pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
        pattern = "enterococcusgallinarum", ignore.case = T),
        `Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
            ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
            pattern = "enterobactercloaceae", ignore.case = T),
        `Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
            ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
            pattern = "citrobacterfreundii", ignore.case = T),
        `Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
            ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
            pattern = "staphylococcusaureus", ignore.case = T),
        `Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
            ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
            pattern = "pseudomonasaeruginosa", ignore.case = T),
        `Stenotrophmonas maltophilia` = grepl(x = organism1,
            pattern = "stenotrophmonasmaltophilia", ignore.case = T),
        `Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
            ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
            pattern = "clostridiumdifficile|clostridioidesdifficile",
            ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
            pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
            pattern = "Culture Negative", ignore.case = T)) %>%
    pivot_longer(-c(patientID:diag_cat3), names_to = "organisms",
        values_to = "org_presence") %>%
    mutate(organisms = ifelse(bact_infection_present == "No",
        "No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
        TRUE, 1, 0)) %>%
    group_by(patientID, diag_cat3, infx_stool, bact_infection_present) %>%
    dplyr::slice_max(org_presence) %>%
    ungroup() %>%
    filter(org_presence == 1, organisms != "No Bacterial Infection") %>%
    ungroup() %>%
    group_by(patientID, infx_stool, diag_cat3) %>%
    dplyr::slice(1) %>%
    group_by(diag_cat3) %>%
    tally() %>%
    ungroup() %>%
    mutate(total_infections = sum(n)) %>%
    group_by(diag_cat3) %>%
    dplyr::summarize(percent = sum(n)/total_infections * 100,
        count = sum(n))

culture_tot <- bind_rows(cult_percent_infx_all, cult_location_infx_all,
    cult_type_infx_all) %>%
    mutate(percent = round(percent, 2))

# Read in csv to then append adjusted pvalues
write.csv(culture_tot, "./Results/Supplemental_Table_1.csv",
    row.names = FALSE)

Antibiotics

## Vector of variables to summarize
abx_vars <- c("Basiliximab", "Mycophenolate", "Steroid", "Systemic Vancomycin",
    "Tacrolimus", "Cefepime", "Metronidazole", "Piperacillin/Tazobactam",
    "Rifaximin", "Ceftriaxone", "Ciprofloxacin", "Gentamicin",
    "Tobramicin", "Daptomycin", "Meropenem", "Oral Vancomycin")

abx2 <- abx %>%
    filter(grepl(pattern = "basilix|tacro|steroid|mycophenolate|lactulose",
        medication_name, ignore.case = T) | grepl(pattern = "GLUCOCORTICOIDS|steroid",
        med_pharm_class, ignore.case = T) | grepl(pattern = "steroid",
        med_pharm_sub_class, ignore.case = T) | grepl("given",
        mar_action, ignore.case = T)) %>%
    mutate(Immunosuppressants = case_when(grepl("basilix", medication_name,
        ignore.case = T) ~ "Basiliximab", grepl("tacro", medication_name,
        ignore.case = T) ~ "Tacrolimus", grepl("one|ide|solu-cortef",
        medication_name, ignore.case = T) ~ "Steroid", grepl("mycophenolate",
        medication_name, ignore.case = T) ~ "Mycophenolate",
        grepl("lactulose", medication_name, ignore.case = T) ~
            "Lactulose"), Antibiotics = case_when(grepl("rifaximin",
        medication_name, ignore.case = T) ~ "Rifaximin", grepl("lactulose",
        medication_name, ignore.case = T) ~ "Lactulose", grepl("ceftriaxone",
        medication_name, ignore.case = T) ~ "Ceftriaxone", grepl("piperacillin|tazobactam",
        medication_name, ignore.case = T) ~ "Piperacillin/Tazobactam",
        grepl("cefepime", medication_name, ignore.case = T) ~
            "Cefepime", grepl("meropenem", medication_name, ignore.case = T) ~
            "Meropenem", grepl("gentamicin", medication_name,
            ignore.case = T) ~ "Gentamicin", grepl("tobramycin",
            medication_name, ignore.case = T) ~ "Tobramicin",
        grepl("vancomycin.+oral", medication_name, ignore.case = T) ~
            "Oral Vancomycin", grepl("vancomycin.+(IV|Intravenous)",
            medication_name, ignore.case = T) ~ "Systemic Vancomycin",
        grepl("METRONIDAZOLE", medication_name, ignore.case = T) ~
            "Metronidazole", grepl("DAPTOMYCIN", medication_name,
            ignore.case = T) ~ "Daptomycin", grepl("linezolid",
            medication_name, ignore.case = T) ~ "Linezolid",
        grepl("fluconazole", medication_name, ignore.case = T) ~
            "Fluconazole", grepl("micafungin", medication_name,
            ignore.case = T) ~ "Micafungin", grepl("cipro", medication_name,
            ignore.case = T) & !grepl("drop", dose_units, ignore.case = T) ~
            "Ciprofloxacin"), action = case_when(!is.na(Immunosuppressants) &
        between(days_transplant, 0, 30) | !is.na(Immunosuppressants) &
        ordering_mode == "Outpatient" ~ "keep", !is.na(Antibiotics) &
        between(days_transplant, -7, 1) ~ "keep", TRUE ~ "remove")) %>%
    group_by(patientID, Immunosuppressants, Antibiotics) %>%
    arrange(days_transplant) %>%
    filter(action == "keep") %>%
    dplyr::slice(1) %>%
    select(patientID, Immunosuppressants, Antibiotics) %>%
    left_join(peri_matrix_all %>%
        select(patientID, any_infection)) %>%
    pivot_longer(!c(patientID, any_infection), names_to = "variable",
        values_to = "value") %>%
    drop_na(value) %>%
    mutate(variable = 1) %>%
    pivot_wider(c(patientID, any_infection), names_from = "value",
        values_from = "variable", values_fn = min) %>%
    replace(is.na(.), 0)

abx_tab1_1 <- CreateTableOne(vars = abx_vars, testNonNormal = "wilcox.test",
    includeNA = FALSE, factorVars = abx_vars, strata = "any_infection",
    data = abx2)

summary(abx_tab1_1)
## 
##      ### Summary of categorical variables ### 
## 
## any_infection: 0
##                      var  n miss p.miss level freq percent cum.percent
##              Basiliximab 82    0    0.0     0   28    34.1        34.1
##                                             1   54    65.9       100.0
##                                                                       
##            Mycophenolate 82    0    0.0     0   10    12.2        12.2
##                                             1   72    87.8       100.0
##                                                                       
##                  Steroid 82    0    0.0     1   82   100.0       100.0
##                                                                       
##      Systemic Vancomycin 82    0    0.0     0   40    48.8        48.8
##                                             1   42    51.2       100.0
##                                                                       
##               Tacrolimus 82    0    0.0     1   82   100.0       100.0
##                                                                       
##                 Cefepime 82    0    0.0     0   56    68.3        68.3
##                                             1   26    31.7       100.0
##                                                                       
##            Metronidazole 82    0    0.0     0   48    58.5        58.5
##                                             1   34    41.5       100.0
##                                                                       
##  Piperacillin/Tazobactam 82    0    0.0     0   12    14.6        14.6
##                                             1   70    85.4       100.0
##                                                                       
##                Rifaximin 82    0    0.0     0   45    54.9        54.9
##                                             1   37    45.1       100.0
##                                                                       
##              Ceftriaxone 82    0    0.0     0   66    80.5        80.5
##                                             1   16    19.5       100.0
##                                                                       
##            Ciprofloxacin 82    0    0.0     0   67    81.7        81.7
##                                             1   15    18.3       100.0
##                                                                       
##               Gentamicin 82    0    0.0     0   81    98.8        98.8
##                                             1    1     1.2       100.0
##                                                                       
##               Tobramicin 82    0    0.0     0   78    95.1        95.1
##                                             1    4     4.9       100.0
##                                                                       
##               Daptomycin 82    0    0.0     0   81    98.8        98.8
##                                             1    1     1.2       100.0
##                                                                       
##                Meropenem 82    0    0.0     0   78    95.1        95.1
##                                             1    4     4.9       100.0
##                                                                       
##          Oral Vancomycin 82    0    0.0     0   81    98.8        98.8
##                                             1    1     1.2       100.0
##                                                                       
## ------------------------------------------------------------ 
## any_infection: 1
##                      var  n miss p.miss level freq percent cum.percent
##              Basiliximab 25    0    0.0     0    7    28.0        28.0
##                                             1   18    72.0       100.0
##                                                                       
##            Mycophenolate 25    0    0.0     0    1     4.0         4.0
##                                             1   24    96.0       100.0
##                                                                       
##                  Steroid 25    0    0.0     1   25   100.0       100.0
##                                                                       
##      Systemic Vancomycin 25    0    0.0     0    8    32.0        32.0
##                                             1   17    68.0       100.0
##                                                                       
##               Tacrolimus 25    0    0.0     1   25   100.0       100.0
##                                                                       
##                 Cefepime 25    0    0.0     0   14    56.0        56.0
##                                             1   11    44.0       100.0
##                                                                       
##            Metronidazole 25    0    0.0     0   13    52.0        52.0
##                                             1   12    48.0       100.0
##                                                                       
##  Piperacillin/Tazobactam 25    0    0.0     0    2     8.0         8.0
##                                             1   23    92.0       100.0
##                                                                       
##                Rifaximin 25    0    0.0     0   13    52.0        52.0
##                                             1   12    48.0       100.0
##                                                                       
##              Ceftriaxone 25    0    0.0     0   18    72.0        72.0
##                                             1    7    28.0       100.0
##                                                                       
##            Ciprofloxacin 25    0    0.0     0   21    84.0        84.0
##                                             1    4    16.0       100.0
##                                                                       
##               Gentamicin 25    0    0.0     0   24    96.0        96.0
##                                             1    1     4.0       100.0
##                                                                       
##               Tobramicin 25    0    0.0     0   22    88.0        88.0
##                                             1    3    12.0       100.0
##                                                                       
##               Daptomycin 25    0    0.0     0   22    88.0        88.0
##                                             1    3    12.0       100.0
##                                                                       
##                Meropenem 25    0    0.0     0   24    96.0        96.0
##                                             1    1     4.0       100.0
##                                                                       
##          Oral Vancomycin 25    0    0.0     0   24    96.0        96.0
##                                             1    1     4.0       100.0
##                                                                       
## 
## p-values
##                            pApprox     pExact
## Basiliximab             0.74143512 0.63341955
## Mycophenolate           0.42082759 0.45169519
## Steroid                         NA         NA
## Systemic Vancomycin     0.21234695 0.17136371
## Tacrolimus                      NA         NA
## Cefepime                0.37287647 0.33702201
## Metronidazole           0.72844870 0.64658181
## Piperacillin/Tazobactam 0.60142514 0.51269562
## Rifaximin               0.98119534 0.82233980
## Ceftriaxone             0.53110302 0.40817712
## Ciprofloxacin           1.00000000 1.00000000
## Gentamicin              0.95599591 0.41438900
## Tobramicin              0.42443880 0.35037337
## Daptomycin              0.05938894 0.03899733
## Meropenem               1.00000000 1.00000000
## Oral Vancomycin         0.95599591 0.41438900
## 
## Standardize mean differences
##                             1 vs 2
## Basiliximab             0.13310348
## Mycophenolate           0.30385759
## Steroid                 0.00000000
## Systemic Vancomycin     0.34709743
## Tacrolimus              0.00000000
## Cefepime                0.25550557
## Metronidazole           0.13174842
## Piperacillin/Tazobactam 0.21056770
## Rifaximin               0.05772164
## Ceftriaxone             0.20043582
## Ciprofloxacin           0.06085600
## Gentamicin              0.17507370
## Tobramicin              0.25833951
## Daptomycin              0.44449214
## Meropenem               0.04264158
## Oral Vancomycin         0.17507370
abx_tab1_2 <- print(abx_tab1_1, formatOptions = list(big.mark = ","))
##                                  Stratified by any_infection
##                                   0           1           p      test
##   n                               82          25                     
##   Basiliximab = 1 (%)             54 ( 65.9)  18 ( 72.0)   0.741     
##   Mycophenolate = 1 (%)           72 ( 87.8)  24 ( 96.0)   0.421     
##   Steroid = 1 (%)                 82 (100.0)  25 (100.0)      NA     
##   Systemic Vancomycin = 1 (%)     42 ( 51.2)  17 ( 68.0)   0.212     
##   Tacrolimus = 1 (%)              82 (100.0)  25 (100.0)      NA     
##   Cefepime = 1 (%)                26 ( 31.7)  11 ( 44.0)   0.373     
##   Metronidazole = 1 (%)           34 ( 41.5)  12 ( 48.0)   0.728     
##   Piperacillin/Tazobactam = 1 (%) 70 ( 85.4)  23 ( 92.0)   0.601     
##   Rifaximin = 1 (%)               37 ( 45.1)  12 ( 48.0)   0.981     
##   Ceftriaxone = 1 (%)             16 ( 19.5)   7 ( 28.0)   0.531     
##   Ciprofloxacin = 1 (%)           15 ( 18.3)   4 ( 16.0)   1.000     
##   Gentamicin = 1 (%)               1 (  1.2)   1 (  4.0)   0.956     
##   Tobramicin = 1 (%)               4 (  4.9)   3 ( 12.0)   0.424     
##   Daptomycin = 1 (%)               1 (  1.2)   3 ( 12.0)   0.059     
##   Meropenem = 1 (%)                4 (  4.9)   1 (  4.0)   1.000     
##   Oral Vancomycin = 1 (%)          1 (  1.2)   1 (  4.0)   0.956
write.csv(abx_tab1_2, "./Results/ABX_Table_1.csv", row.names = TRUE)  # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe

# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
abx_tab1_2_padjust1 <- read.csv("./Results/ABX_Table_1.csv") %>%
    dplyr::rename(` ` = X, `No Infection` = X0, `Bacterial Infection` = X1)

abx_tab1_2_padjust2 <- abx_tab1_2_padjust1 %>%
    mutate(` ` = factor(` `, levels = abx_tab1_2_padjust1$` `))

abx_tab1_2_padjust3 <- abx_tab1_2_padjust1 %>%
    mutate(test = ifelse(!is.na(p) & is.na(test), "chi.sq", "")) %>%
    group_by(test) %>%
    rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
    ungroup() %>%
    mutate(` ` = factor(` `, abx_tab1_2_padjust2$` `)) %>%
    arrange(` `) %>%
    mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
        "", p.adj))

# Read in csv to then append adjusted pvalues
write.csv(abx_tab1_2_padjust3, "./Results/ABX_Table_1_padjust.csv",
    row.names = FALSE)

Miscellaneous Statistics: Supplemental Table 2

# Expansions and Infections Stats
expan_infx_stats <- peri_matrix_all %>%
    left_join(peri_matrix_all %>%
        select(sampleID, ends_with("infection")) %>%
        ungroup()) %>%
    mutate(ecoc_infx = enterococcus_infection, ecoc_infx = ifelse(ecoc_infx >=
        1, 1, 0), ebac_infx = enterobacterales_infection, ebac_infx = ifelse(ebac_infx >=
        1, 1, 0)) %>%
    select(enterococcus_rel_abundance, enterobacterales_rel_abundance,
        ecoc_infx, ebac_infx) %>%
    summarise(ecoc_expan_above_cutpoint = sum(enterococcus_rel_abundance >=
        optimal_cutpoint_rel$optimal_cutpoint[2]), ecoc_infx_ecoc_below_cutpoint = sum(ecoc_infx ==
        1 & enterococcus_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[2],
        na.rm = T), ecoc_infx_ecoc_above_cutpoint = sum(ecoc_infx ==
        1 & enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2],
        na.rm = T), ecoc_noinfx_ecoc_below_cutpoint = sum(ecoc_infx ==
        0 & enterococcus_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[2],
        na.rm = T), ecoc_noinfx_ecoc_above_cutpoint = sum(ecoc_infx ==
        0 & enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2],
        na.rm = T), ebac_expan_above_cutpoint = sum(enterobacterales_rel_abundance >=
        optimal_cutpoint_rel$optimal_cutpoint[1]), ebac_infx_ebac_below_cutpoint = sum(ebac_infx ==
        1 & enterobacterales_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[1],
        na.rm = T), ebac_infx_ebac_above_cutpoint = sum(ebac_infx ==
        1 & enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1],
        na.rm = T), ebac_noinfx_ebac_below_cutpoint = sum(ebac_infx ==
        0 & enterobacterales_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[1],
        na.rm = T), ebac_noinfx_ebac_above_cutpoint = sum(ebac_infx ==
        0 & enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1],
        na.rm = T)) %>%
    rownames_to_column(var = "rowname") %>%
    pivot_longer(-rowname, names_to = "metric", values_to = "count") %>%
    mutate(percent = round((count/107) * 100, 2)) %>%
    select(-rowname)

ecoc_infx_confusion <- expan_infx_stats %>%
    filter(metric %in% c("ecoc_infx_ecoc_below_cutpoint", "ecoc_infx_ecoc_above_cutpoint",
        "ecoc_noinfx_ecoc_below_cutpoint", "ecoc_noinfx_ecoc_above_cutpoint")) %>%
    select(metric, count) %>%
    pivot_wider(names_from = metric, values_from = count)

ecoc_infx_confusion_cnfs <- data.frame(Enterococcus = c("Infection",
    "No Infection"), `Greater Than Cutpoint` = c(ecoc_infx_confusion$ecoc_infx_ecoc_above_cutpoint,
    ecoc_infx_confusion$ecoc_noinfx_ecoc_above_cutpoint), `Less Than Cutpoint` = c(ecoc_infx_confusion$ecoc_infx_ecoc_below_cutpoint,
    ecoc_infx_confusion$ecoc_noinfx_ecoc_below_cutpoint))

ecoc_infx_confusion_cnfs <- ecoc_infx_confusion_cnfs %>%
    mutate(sensitivity = round(ecoc_infx_confusion_cnfs[1, 2]/(ecoc_infx_confusion_cnfs[1,
        2] + ecoc_infx_confusion_cnfs[1, 3]), 3), specificity = round(ecoc_infx_confusion_cnfs[2,
        3]/(ecoc_infx_confusion_cnfs[2, 3] + ecoc_infx_confusion_cnfs[2,
        2]), 3), odds_ratio = round((ecoc_infx_confusion_cnfs[1,
        2]/ecoc_infx_confusion_cnfs[1, 3])/(ecoc_infx_confusion_cnfs[2,
        2]/ecoc_infx_confusion_cnfs[2, 3]), 3))

write.csv(ecoc_infx_confusion_cnfs, "./Results/Supplemental_Table_2.csv",
    row.names = FALSE)


ebac_infx_confusion <- expan_infx_stats %>%
    filter(metric %in% c("ebac_infx_ebac_below_cutpoint", "ebac_infx_ebac_above_cutpoint",
        "ebac_noinfx_ebac_below_cutpoint", "ebac_noinfx_ebac_above_cutpoint")) %>%
    select(metric, count) %>%
    pivot_wider(names_from = metric, values_from = count)

ebac_infx_confusion_cnfs <- data.frame(Enterobacterales = c("Infection",
    "No Infection"), `Greater Than Cutpoint` = c(ebac_infx_confusion$ebac_infx_ebac_above_cutpoint,
    ebac_infx_confusion$ebac_noinfx_ebac_above_cutpoint), `Less Than Cutpoint` = c(ebac_infx_confusion$ebac_infx_ebac_below_cutpoint,
    ebac_infx_confusion$ebac_noinfx_ebac_below_cutpoint))

ebac_infx_confusion_cnfs <- ebac_infx_confusion_cnfs %>%
    mutate(sensitivity = round(ebac_infx_confusion_cnfs[1, 2]/(ebac_infx_confusion_cnfs[1,
        2] + ebac_infx_confusion_cnfs[1, 3]), 3), specificity = round(ebac_infx_confusion_cnfs[2,
        3]/(ebac_infx_confusion_cnfs[2, 3] + ebac_infx_confusion_cnfs[2,
        2]), 3), odds_ratio = round((ebac_infx_confusion_cnfs[1,
        2]/ebac_infx_confusion_cnfs[1, 3])/(ebac_infx_confusion_cnfs[2,
        2]/ebac_infx_confusion_cnfs[2, 3]), 3))

write.csv(ebac_infx_confusion_cnfs, "./Results/Supplemental_Table_3.csv",
    row.names = FALSE)

Supplemental Figure 4: Fecal Sample Collected from Transplant

sample_days <- metaphlan_df_sumry %>%
    group_by(sampleID) %>%
    slice(1) %>%
    filter(db == "Liver Transplant") %>%
    select(sampleID, diversity_group) %>%
    left_join(sample_lookup %>%
        select(sampleID, sample_days_from_transplant)) %>%
    ungroup()


# Quartiles of samples
clin_quartiles <- sample_days %>%
    select(sample_days_from_transplant) %>%
    summarise(minimum = min(sample_days_from_transplant, na.rm = T),
        lower = unname(quantile(sample_days_from_transplant,
            probs = 0.25, na.rm = T)), median = unname(quantile(sample_days_from_transplant,
            probs = 0.5, na.rm = T)), upper = unname(quantile(sample_days_from_transplant,
            probs = 0.75, na.rm = T)), maximum = max(sample_days_from_transplant,
            na.rm = T)) %>%
    mutate(placeholder = 1) %>%
    pivot_longer(!placeholder, names_to = "ranges", values_to = "value")



sample_days_histogram <- sample_days %>%
    ggplot(., aes(x = sample_days_from_transplant)) + geom_histogram(color = "black",
    fill = "grey", binwidth = 1) + geom_vline(aes(xintercept = 0),
    color = "orangered", linetype = "solid", size = 1.2) + geom_vline(aes(xintercept = median(sample_days_from_transplant,
    na.rm = TRUE)), color = "cyan2", linetype = "dashed", size = 1.2) +
    geom_text(aes(x = median(sample_days_from_transplant, na.rm = TRUE),
        y = 12.5), color = "cyan2", label = paste0("Median = ",
        clin_quartiles %>%
            filter(ranges == "median") %>%
            select(value), " days"), nudge_x = 4) + geom_text(label = "Transplant Date = Day 0",
    x = -4, y = 12.5, color = "orangered") + theme_bw() + theme(panel.grid = element_blank(),
    axis.text = element_text(color = "black", size = 12), axis.title = element_text(color = "black",
        size = 14), legend.position = "none") + scale_x_continuous(breaks = seq(-7,
    30, 5), limits = c(-7, 35)) + ylab("Number of Samples\n")


sample_days_boxplot <- sample_days %>%
    ggplot(., aes(x = sample_days_from_transplant, y = diversity_group,
        fill = diversity_group)) + geom_vline(aes(xintercept = 0),
    color = "orangered", linetype = "solid", size = 1.2) + geom_vline(aes(xintercept = median(sample_days_from_transplant,
    na.rm = TRUE)), color = "cyan2", linetype = "dashed", size = 1.2) +
    geom_violin() + geom_boxplot(alpha = 0.5, width = 0.15, outlier.shape = NA,
    fill = "white", color = "white") + stat_compare_means(comparisons = list(c("High Diversity",
    "Medium Diversity"), c("High Diversity", "Low Diversity"),
    c("Medium Diversity", "Low Diversity")), bracket.size = 0.3,
    step.increase = 0.07) + theme_bw() + theme(panel.grid = element_blank(),
    axis.text.x = element_text(color = "black", size = 12), axis.text.y = element_text(color = "black",
        size = 12), axis.ticks.y = element_blank(), axis.title = element_text(color = "black",
        size = 14), legend.position = "none") + scale_x_continuous(breaks = seq(-7,
    30, 5), limits = c(-7, 35)) + scale_fill_manual(values = c("#3A001E",
    "#8A0246", "#C20463")) + ylab("Diversity Groups\n") + xlab("\nDays to Sample")
## [1] FALSE
pdf(file = "./Results/Supplemental_Figure_4.pdf", height = 10,
    width = 12, onefile = F)

gg.stack(sample_days_histogram, sample_days_boxplot)

dev.off()
## quartz_off_screen 
##                 2
gg.stack(sample_days_histogram, sample_days_boxplot)

Save Data Image

save.image(file = "./Data/LT_Modeling.RData")